translit.cpp revision b13da9df870a61b11249bf741347908dbea0edd8
1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 3b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copyright (C) 1999-2007, International Business Machines 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Corporation and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 11/17/99 aliu Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ********************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/putil.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/translit.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/locid.h" 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/msgfmt.h" 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/rep.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/resbund.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unifilt.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uscript.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/strenum.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cpdtrans.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "nultrans.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt_data.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt_pars.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "rbt.h" 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "transreg.h" 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "name2uni.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "nortrans.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "remtrans.h" 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "titletrn.h" 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tolowtrn.h" 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "toupptrn.h" 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uni2name.h" 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "esctrn.h" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unesctrn.h" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tridpars.h" 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "anytrans.h" 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "mutex.h" 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uassert.h" 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cstring.h" 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uinvchar.h" 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar TARGET_SEP = 0x002D; /*-*/ 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ID_DELIM = 0x003B; /*;*/ 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar VARIANT_SEP = 0x002F; // '/' 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prefix for resource bundle key for the display name for a 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator. The ID is appended to this to form the key. 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String. 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_DISPLAY_NAME_PREFIX[] = "%Translit%%"; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Prefix for resource bundle key for the display name for a 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator SCRIPT. The ID is appended to this to form the key. 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String. 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_SCRIPT_DISPLAY_NAME_PREFIX[] = "%Translit%"; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resource bundle key for display name pattern. 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String forming a 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * MessageFormat pattern, e.g.: 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "{0,choice,0#|1#{1} Transliterator|2#{1} to {2} Transliterator}". 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_DISPLAY_NAME_PATTERN[] = "TransliteratorNamePattern"; 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Resource bundle key for the list of RuleBasedTransliterator IDs. 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The resource bundle value should be a String[] with each element 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * being a valid ID. The ID will be appended to RB_RULE_BASED_PREFIX 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to obtain the class name in which the RB_RULE key will be sought. 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const char RB_RULE_BASED_IDS[] = "RuleBasedTransliteratorIDs"; 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The mutex controlling access to registry object. 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic UMTX registryMutex = 0; 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * System transliterator registry; non-null when initialized. 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic U_NAMESPACE_QUALIFIER TransliteratorRegistry* registry = 0; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Macro to check/initialize the registry. ONLY USE WITHIN 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// MUTEX. Avoids function call when registry is initialized. 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#define HAVE_REGISTRY (registry!=0 || initializeRegistry()) 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Empty string 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar EMPTY[] = {0}; //"" 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_ABSTRACT_RTTI_IMPLEMENTATION(Transliterator) 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return TRUE if the given UTransPosition is valid for text of 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the given length. 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic inline UBool positionIsValid(UTransPosition& index, int32_t len) { 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(index.contextStart < 0 || 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start < index.contextStart || 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit < index.start || 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit < index.limit || 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru len < index.contextLimit); 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Default constructor. 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param theID the string identifier for this transliterator 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param theFilter the filter. Any character for which 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>filter.contains()</tt> returns <tt>FALSE</tt> will not be 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * altered by this transliterator. If <tt>filter</tt> is 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>null</tt> then no filtering is applied. 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::Transliterator(const UnicodeString& theID, 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter* adoptedFilter) : 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObject(), ID(theID), filter(adoptedFilter), 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength(0) 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string, which is a non-aliased copy. 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append((UChar)0); 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.truncate(ID.length()-1); 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Destructor. 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::~Transliterator() { 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter) { 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Copy constructor. 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::Transliterator(const Transliterator& other) : 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObject(other), ID(other.ID), filter(0), 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength(other.maximumContextLength) 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string, which is a non-aliased copy. 153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append((UChar)0); 154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.truncate(ID.length()-1); 155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (other.filter != 0) { 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We own the filter, so we must have our own copy 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = (UnicodeFilter*) other.filter->clone(); 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::clone() const { 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Assignment operator. 168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator& Transliterator::operator=(const Transliterator& other) { 170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID = other.ID; 171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string 172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.getTerminatedBuffer(); 173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength = other.maximumContextLength; 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru adoptFilter((other.filter == 0) ? 0 : (UnicodeFilter*) other.filter->clone()); 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates a segment of a string. <code>Transliterator</code> API. 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the string to be transliterated 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param start the beginning index, inclusive; <code>0 <= start 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <= limit</code>. 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param limit the ending index, exclusive; <code>start <= limit 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <= text.length()</code>. 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return the new limit index, or -1 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::transliterate(Replaceable& text, 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start, int32_t limit) const { 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (start < 0 || 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit < start || 192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.length() < limit) { 193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return -1; 194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition offsets; 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.contextStart= start; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.contextLimit = limit; 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.start = start; 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru offsets.limit = limit; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, offsets, FALSE, TRUE); 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return offsets.limit; 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates an entire string in place. Convenience method. 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the string to be transliterated 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text) const { 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transliterate(text, 0, text.length()); 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly after new text has been inserted, 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * typically as a result of a keyboard event. The new text in 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>insertion</code> will be inserted into <code>text</code> 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at <code>index.contextLimit</code>, advancing 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code> by <code>insertion.length()</code>. 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Then the transliterator will try to transliterate characters of 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>text</code> between <code>index.start</code> and 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. Characters before 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> will not be changed. 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Upon return, values in <code>index</code> will be updated. 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextStart</code> will be advanced to the first 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * character that future calls to this method will read. 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> and <code>index.contextLimit</code> will 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * be adjusted to delimit the range of text that future calls to 230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this method may change. 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Typical usage of this method begins with an initial call 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * with <code>index.contextStart</code> and <code>index.contextLimit</code> 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * set to indicate the portion of <code>text</code> to be 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated, and <code>index.start == index.contextStart</code>. 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Thereafter, <code>index</code> can be used without 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * modification in future calls, provided that all changes to 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>text</code> are made via this method. 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>This method assumes that future calls may be made that will 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * insert new text into the buffer. As a result, it only performs 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unambiguous transliterations. After the last call to this 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * method, there may be untransliterated text that is waiting for 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * more input to resolve an ambiguity. In order to perform these 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pending transliterations, clients should call {@link 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #finishKeyboardTransliteration} after the last call to this 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * method has been made. 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and untransliterated text 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <ul><li><code>index.contextStart</code>: the beginning index, 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inclusive; <code>0 <= index.contextStart <= index.contextLimit</code>. 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <li><code>index.contextLimit</code>: the ending index, exclusive; 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextStart <= index.contextLimit <= text.length()</code>. 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>insertion</code> is inserted at 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <li><code>index.start</code>: the next character to be 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * considered for transliteration; <code>index.contextStart <= 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * index.start <= index.contextLimit</code>. Characters before 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.start</code> will not be changed by future calls 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to this method.</ul> 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param insertion text to be inserted and possibly 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated into the translation buffer at 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. If <code>null</code> then no text 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is inserted. 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #START 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #LIMIT 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #CURSOR 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #handleTransliterate 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @exception IllegalArgumentException if <code>index</code> 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is invalid 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& insertion, 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, &insertion, status); 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly after a new character has been 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * inserted, typically as a result of a keyboard event. This is a 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * convenience method; see {@link 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)} for details. 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. See {@link 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)}. 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param insertion text to be inserted and possibly 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated into the translation buffer at 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>index.contextLimit</code>. 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #transliterate(Replaceable, int[], String) 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 insertion, 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) const { 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString str(insertion); 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, &str, status); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterates the portion of the text buffer that can be 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterated unambiguosly. This is a convenience method; see 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * {@link #transliterate(Replaceable, int[], String)} for 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * details. 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index an array of three integers. See {@link 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate(Replaceable, int[], String)}. 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #transliterate(Replaceable, int[], String) 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::transliterate(Replaceable& text, 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) const { 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _transliterate(text, index, 0, status); 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Finishes any pending transliterations that were waiting for 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * more characters. Clients should call this method as the last 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * call after a sequence of one or more calls to 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>transliterate()</code>. 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param text the buffer holding transliterated and 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * untransliterated text. 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param index the array of indices previously passed to {@link 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #transliterate} 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::finishTransliteration(Replaceable& text, 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index) const { 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!positionIsValid(index, text.length())) { 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, FALSE, TRUE); 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This internal method does keyboard transliteration. If the 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 'insertion' is non-null then we append it to 'text' before 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * proceeding. This method calls through to the pure virtual 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * framework method handleTransliterate() to do the actual 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * work. 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_transliterate(Replaceable& text, 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* insertion, 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) const { 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!positionIsValid(index, text.length())) { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ILLEGAL_ARGUMENT_ERROR; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// int32_t originalStart = index.contextStart; 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (insertion != 0) { 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.handleReplaceBetween(index.limit, index.limit, *insertion); 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit += insertion->length(); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit += insertion->length(); 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.limit > 0 && 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_IS_LEAD(text.charAt(index.limit - 1))) { 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Oops, there is a dangling lead surrogate in the buffer. 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This will break most transliterators, since they will 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assume it is part of a pair. Don't transliterate until 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // more text comes in. 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, TRUE, TRUE); 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if 0 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // I CAN'T DO what I'm attempting below now that the Kleene star 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // operator is supported. For example, in the rule 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // ([:Lu:]+) { x } > $1; 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // what is the maximum context length? getMaximumContextLength() 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // will return 1, but this is just the length of the ante context 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // part of the pattern string -- 1 character, which is a standin 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for a Quantifier, which contains a StringMatcher, which 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains a UnicodeSet. 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // There is a complicated way to make this work again, and that's 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to add a "maximum left context" protocol into the 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // UnicodeMatcher hierarchy. At present I'm not convinced this is 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // worth it. 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // --- 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The purpose of the code below is to keep the context small 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // while doing incremental transliteration. When part of the left 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // context (between contextStart and start) is no longer needed, 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we try to advance contextStart past that portion. We use the 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maximum context length to do so. 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t newCS = index.start; 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = getMaximumContextLength(); 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (newCS > originalStart && n-- > 0) { 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru --newCS; 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru newCS -= UTF_CHAR_LENGTH(text.char32At(newCS)) - 1; 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextStart = uprv_max(newCS, originalStart); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * This method breaks up the input text into runs of unfiltered 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * characters. It passes each such run to 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <subclass>.handleTransliterate(). Subclasses that can handle the 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter logic more efficiently themselves may override this method. 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * All transliteration calls in this class go through this method. 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::filteredTransliterate(Replaceable& text, 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental, 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool rollback) const { 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Short circuit path for transliterators with no filter in 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // non-incremental mode. 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter == 0 && !rollback) { 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, incremental); 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //---------------------------------------------------------------------- 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This method processes text in two groupings: 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // RUNS -- A run is a contiguous group of characters which are contained 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in the filter for this transliterator (filter.contains(ch) == TRUE). 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Text outside of runs may appear as context but it is not modified. 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The start and limit Position values are narrowed to each run. 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // PASSES (incremental only) -- To make incremental mode work correctly, 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // each run is broken up into n passes, where n is the length (in code 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // points) of the run. Each pass contains the first n characters. If a 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pass is completely transliterated, it is committed, and further passes 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // include characters after the committed text. If a pass is blocked, 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and does not transliterate completely, then this method rolls back 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the changes made during the pass, extends the pass by one code point, 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and tries again. 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //---------------------------------------------------------------------- 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // globalLimit is the limit value for the entire operation. We 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // set index.limit to the end of each unfiltered run before 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // calling handleTransliterate(), so we need to maintain the real 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // value of index.limit here. After each transliteration, we 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // update globalLimit for insertions or deletions that have 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // happened. 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t globalLimit = index.limit; 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If there is a non-null filter, then break the input text up. Say the 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input text has the form: 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // xxxabcxxdefxx 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // where 'x' represents a filtered character (filter.contains('x') == 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // false). Then we break this up into: 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // xxxabc xxdef xx 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Each pass through the loop consumes a run of filtered 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // characters (which are ignored) and a subsequent run of 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // unfiltered characters (which are transliterated). 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Narrow the range to be transliterated to the first segment 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // of unfiltered characters at or after index.start. 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Advance past filtered chars 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c; 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (index.start < globalLimit && 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru !filter->contains(c=text.char32At(index.start))) { 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start += UTF_CHAR_LENGTH(c); 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the end of this run of unfiltered chars 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = index.start; 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (index.limit < globalLimit && 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter->contains(c=text.char32At(index.limit))) { 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit += UTF_CHAR_LENGTH(c); 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Check to see if the unfiltered run is empty. This only 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // happens at the end of the string when all the remaining 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // characters are filtered. 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.limit == index.start) { 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(index.start == globalLimit); 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Is this run incremental? If there is additional 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filtered text (if limit < globalLimit) then we pass in 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // an incremental value of FALSE to force the subclass to 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // complete the transliteration for this run. 504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool isIncrementalRun = 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (index.limit < globalLimit ? FALSE : incremental); 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t delta; 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Implement rollback. To understand the need for rollback, 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // consider the following transliterator: 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "t" is "a > A;" 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "u" is "A > b;" 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // "v" is a compound of "t; NFD; u" with a filter [:Ll:] 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Now apply "c" to the input text "a". The result is "b". But if 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the transliteration is done incrementally, then the NFD holds 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // things up after "t" has already transformed "a" to "A". When 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // finishTransliterate() is called, "A" is _not_ processed because 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it gets excluded by the [:Ll:] filter, and the end result is "A" 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // -- incorrect. The problem is that the filter is applied to a 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // partially-transliterated result, when we only want it to apply to 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input text. Although this example hinges on a compound 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterator containing NFD and a specific filter, it can 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // actually happen with any transliterator which may do a partial 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transformation in incremental mode into characters outside its 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // filter. 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // To handle this, when in incremental mode we supply characters to 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() in several passes. Each pass adds one more 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // input character to the input text. That is, for input "ABCD", we 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // first try "A", then "AB", then "ABC", and finally "ABCD". If at 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // any point we block (upon return, start < limit) then we roll 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // back. If at any point we complete the run (upon return start == 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // limit) then we commit that run. 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (rollback && isIncrementalRun) { 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runStart = index.start; 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runLimit = index.limit; 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t runLength = runLimit - runStart; 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Make a rollback copy at the end of the string 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rollbackOrigin = text.length(); 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.copy(runStart, runLimit, rollbackOrigin); 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Variables reflecting the commitment of completely 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterated text. passStart is the runStart, advanced 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // past committed text. rollbackStart is the rollbackOrigin, 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // advanced past rollback text that corresponds to committed 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // text. 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passStart = runStart; 553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rollbackStart = rollbackOrigin; 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The limit for each pass; we advance by one code point with 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // each iteration. 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passLimit = index.start; 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Total length, in 16-bit code units, of uncommitted text. 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This is the length to be rolled back. 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t uncommittedLength = 0; 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Total delta (change in length) for all passes 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t totalDelta = 0; 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // PASS MAIN LOOP -- Start with a single character, and extend 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the text by one character at a time. Roll back partial 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterations and commit complete transliterations. 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Length of additional code point, either one or two 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t charLength = 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTF_CHAR_LENGTH(text.char32At(passLimit)); 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru passLimit += charLength; 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (passLimit > runLimit) { 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uncommittedLength += charLength; 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = passLimit; 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delegate to subclass for actual transliteration. Upon 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // return, start will be updated to point after the 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // transliterated text, and limit and contextLimit will be 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // adjusted for length changes. 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, TRUE); 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delta = index.limit - passLimit; // change in length 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We failed to completely transliterate this pass. 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Roll back the text. Indices remain unchanged; reset 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // them where necessary. 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index.start != index.limit) { 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Find the rollbackStart, adjusted for length changes 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // and the deletion of partially transliterated text. 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t rs = rollbackStart + delta - (index.limit - passStart); 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete the partially transliterated text 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.handleReplaceBetween(passStart, index.limit, EMPTY); 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Copy the rollback text back 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.copy(rs, rs + uncommittedLength, passStart); 602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Restore indices to their original values 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = passStart; 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = passLimit; 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.contextLimit -= delta; 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We did completely transliterate this pass. Update the 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // commit indices to record how far we got. Adjust indices 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // for length change. 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move the pass indices past the committed text. 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru passStart = passLimit = index.start; 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust the rollbackStart for length changes and move 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it past the committed text. All characters we've 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // processed to this point are committed now, so zero 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // out the uncommittedLength. 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rollbackStart += delta + uncommittedLength; 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uncommittedLength = 0; 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust indices for length changes. 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru runLimit += delta; 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru totalDelta += delta; 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust overall limit and rollbackOrigin for insertions and 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // deletions. Don't need to worry about contextLimit because 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() maintains that. 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rollbackOrigin += totalDelta; 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalLimit += totalDelta; 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delete the rollback copy 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru text.handleReplaceBetween(rollbackOrigin, rollbackOrigin + runLength, EMPTY); 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Move start past committed text 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = passStart; 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Delegate to subclass for actual transliteration. 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = index.limit; 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleTransliterate(text, index, isIncrementalRun); 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delta = index.limit - limit; // change in length 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In a properly written transliterator, start == limit after 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // handleTransliterate() returns when incremental is false. 650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Catch cases where the subclass doesn't do this, and throw 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // an exception. (Just pinning start to limit is a bad idea, 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // because what's probably happening is that the subclass 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // isn't transliterating all the way to the end, and it should 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // in non-incremental mode.) 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!incremental && index.start != index.limit) { 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We can't throw an exception, so just fudge things 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.start = index.limit; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Adjust overall limit for insertions/deletions. Don't need 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // to worry about contextLimit because handleTransliterate() 662b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // maintains that. 663b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalLimit += delta; 664b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 666b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter == NULL || isIncrementalRun) { 667b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 669b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 670b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If we did completely transliterate this 671b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // run, then repeat with the next unfiltered run. 672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Start is valid where it is. Limit needs to be put back where 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // it was, modulo adjustments for deletions/insertions. 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru index.limit = globalLimit; 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::filteredTransliterate(Replaceable& text, 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransPosition& index, 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool incremental) const { 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filteredTransliterate(text, index, incremental, FALSE); 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Method for subclasses to use to set the maximum context length. 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getMaximumContextLength 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::setMaximumContextLength(int32_t maxContextLength) { 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maximumContextLength = maxContextLength; 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a programmatic identifier for this transliterator. 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If this identifier is passed to <code>getInstance()</code>, it 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * will return this object, if it has been registered. 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getAvailableIDs 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString& Transliterator::getID(void) const { 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return ID; 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a name for this transliterator that is appropriate for 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * display to the user in the default locale. See {@link 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * #getDisplayName(Locale)} for details. 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& ID, 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return getDisplayName(ID, Locale::getDefault(), result); 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a name for this transliterator that is appropriate for 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * display to the user in the given locale. This name is taken 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * from the locale resource data in the standard manner of the 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>java.text</code> package. 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>If no localized names exist in the system resource bundles, 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a name is synthesized using a localized 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>MessageFormat</code> pattern from the resource data. The 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * arguments to this pattern are an integer followed by one or two 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * strings. The integer is the number of strings, either 1 or 2. 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The strings are formed by splitting the ID for this 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator at the first TARGET_SEP. If there is no TARGET_SEP, then the 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * entire ID forms the only string. 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param inLocale the Locale in which the display name should be 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * localized. 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see java.text.MessageFormat 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getDisplayName(const UnicodeString& id, 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const Locale& inLocale, 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ResourceBundle bundle(U_ICUDATA_TRANSLIT, inLocale, status); 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Suspend checking status until later... 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.truncate(0); 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Normalize the ID 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source, target, variant; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool sawSource; 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::IDtoSTV(id, source, target, variant, sawSource); 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (target.length() < 1) { 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // No target; malformed id 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (variant.length() > 0) { // Change "Foo" to "/Foo" 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant.insert(0, VARIANT_SEP); 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString ID(source); 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.append(TARGET_SEP).append(target).append(variant); 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // build the char* key 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_isInvariantUString(ID.getBuffer(), ID.length())) { 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru char key[200]; 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(key, RB_DISPLAY_NAME_PREFIX); 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t length=(int32_t)uprv_strlen(RB_DISPLAY_NAME_PREFIX); 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ID.extract(0, (int32_t)(sizeof(key)-length), key+length, (int32_t)(sizeof(key)-length), US_INV); 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Try to retrieve a UnicodeString from the bundle. 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString resString = bundle.getStringEx(key, status); 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status) && resString.length() != 0) { 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result = resString; // [sic] assign & return 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_FORMATTING 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We have failed to get a name from the locale data. This is 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // typical, since most transliterators will not have localized 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // name data. The next step is to retrieve the MessageFormat 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pattern from the locale data and to use it to synthesize the 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // name from the ID. 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = bundle.getStringEx(RB_DISPLAY_NAME_PATTERN, status); 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status) && resString.length() != 0) { 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru MessageFormat msg(resString, inLocale, status); 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Suspend checking status until later... 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We pass either 2 or 3 Formattable objects to msg. 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Formattable args[3]; 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t nargs; 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[0].setLong(2); // # of args to follow 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[1].setString(source); 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[2].setString(target); 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru nargs = 3; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Use display names for the scripts, if they exist 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString s; 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru length=(int32_t)uprv_strlen(RB_SCRIPT_DISPLAY_NAME_PREFIX); 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int j=1; j<=2; ++j) { 797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru uprv_strcpy(key, RB_SCRIPT_DISPLAY_NAME_PREFIX); 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[j].getString(s); 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (uprv_isInvariantUString(s.getBuffer(), s.length())) { 801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru s.extract(0, sizeof(key)-length-1, key+length, (int32_t)sizeof(key)-length-1, US_INV); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = bundle.getStringEx(key, status); 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru args[j] = resString; 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_ZERO_ERROR; 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FieldPosition pos; // ignored by msg 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru msg.format(args, nargs, result, pos, status); 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.append(variant); 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We should not reach this point unless there is something 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // wrong with the build or the RB_DISPLAY_NAME_PATTERN has 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // been deleted from the root RB_LOCALE_ELEMENTS resource. 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ID; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the filter used by this transliterator, or <tt>null</tt> 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if this transliterator uses no filter. Caller musn't delete 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the result! 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeFilter* Transliterator::getFilter(void) const { 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return filter; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns the filter used by this transliterator, or 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>NULL</tt> if this transliterator uses no filter. The 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * caller must eventually delete the result. After this call, 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this transliterator's filter is set to <tt>NULL</tt>. 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeFilter* Transliterator::orphanFilter(void) { 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeFilter *result = filter; 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = NULL; 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Changes the filter used by this transliterator. If the filter 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * is set to <tt>null</tt> then no filtering will occur. 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Callers must take care if a transliterator is in use by 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * multiple threads. The filter should not be changed by one 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * thread while another thread may be transliterating. 857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::adoptFilter(UnicodeFilter* filterToAdopt) { 859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = filterToAdopt; 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns this transliterator's inverse. See the class 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * documentation for details. This implementation simply inverts 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the two entities in the ID and attempts to retrieve the 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resulting transliterator. That is, if <code>getID()</code> 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returns "A-B", then this method will return the result of 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>getInstance("B-A")</code>, or <code>null</code> if that 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * call fails. 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>This method does not take filtering into account. The 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returned transliterator will have no filter. 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Subclasses with knowledge of their inverse may wish to 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * override this method. 877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a transliterator that is an inverse, not necessarily 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * exact, of this transliterator, or <code>null</code> if no such 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * transliterator is registered. 881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::createInverse(UErrorCode& status) const { 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parseError; 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return Transliterator::createInstance(ID, UTRANS_REVERSE,parseError,status); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createInstance(const UnicodeString& ID, 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError parseError; 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return createInstance(ID, dir, parseError, status); 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a <code>Transliterator</code> object given its ID. 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The ID must be either a system transliterator ID or a ID registered 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * using <code>registerInstance()</code>. 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param ID a valid ID, as enumerated by <code>getAvailableIDs()</code> 903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return A <code>Transliterator</code> object with the given ID 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getAvailableIDs 906b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getID 907b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 909b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createInstance(const UnicodeString& ID, 910b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError& parseError, 912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString canonID; 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector list(status); 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* globalFilter; 925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // TODO add code for parseError...currently unused, but 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // later may be used by parsing code... 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!TransliteratorIDParser::parseCompoundID(ID, dir, canonID, list, globalFilter)) { 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru status = U_INVALID_ID; 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::instantiateList(list, status); 933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(list.size() > 0); 938b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = NULL; 939b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 940b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list.size() > 1 || canonID.indexOf(ID_DELIM) >= 0) { 941b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [NOTE: If it's a compoundID, we instantiate a CompoundTransliterator even if it only 942b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // has one child transliterator. This is so that toRules() will return the right thing 943b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // (without any inactive ID), but our main ID still comes out correct. That is, if we 944b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // instantiate "(Lower);Latin-Greek;", we want the rules to come out as "::Latin-Greek;" 945b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // even though the ID is "(Lower);Latin-Greek;". 946b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new CompoundTransliterator(list, parseError, status); 947b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 948b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 949b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = (Transliterator*)list.elementAt(0); 950b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 951b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 952b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(canonID); 953b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (globalFilter != NULL) { 954b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->adoptFilter(globalFilter); 955b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 956b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 957b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 958b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 959b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 960b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Create a transliterator from a basic ID. This is an ID 961b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * containing only the forward direction source, target, and 962b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * variant. 963b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id a basic ID of the form S-T or S-T/V. 964b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a newly created Transliterator or null if the ID is 965b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * invalid. 966b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 967b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* Transliterator::createBasicInstance(const UnicodeString& id, 968b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* canon) { 969b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError pe; 970b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 971b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorAlias* alias = 0; 972b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = 0; 973b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 974b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 975b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 976b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 977b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = registry->get(id, alias, ec); 978b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 979b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 980b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 981b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 982b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 983b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 984b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 985b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 986b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 987b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We may have not gotten a transliterator: Because we can't 988b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // instantiate a transliterator from inside TransliteratorRegistry:: 989b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // get() (that would deadlock), we sometimes pass back an alias. This 990b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // contains the data we need to finish the instantiation outside the 991b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // registry mutex. The alias may, in turn, generate another alias, so 992b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // we handle aliases in a loop. The max times through the loop is two. 993b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // [alan] 994b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (alias != 0) { 995b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(t==0); 996b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Rule-based aliases are handled with TransliteratorAlias:: 997b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // parse(), followed by TransliteratorRegistry::reget(). 998b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Other aliases are handled with TransliteratorAlias::create(). 999b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (alias->isRuleBased()) { 1000b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 1. parse 1001b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorParser parser(ec); 1002b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias->parse(parser, pe, ec); 1003b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1004b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias = 0; 1005b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1006b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 2. reget 1007b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1008b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1009b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = registry->reget(id, parser, alias, ec); 1010b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1011b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1012b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1013b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Step 3. Loop back around! 1014b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1015b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = alias->create(pe, ec); 1016b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1017b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru alias = 0; 1018b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1019b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1020b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1021b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 1022b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete alias; 1023b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = NULL; 1024b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1025b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1026b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1027b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1028b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL && canon != NULL) { 1029b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(*canon); 1030b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1031b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1032b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 1033b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1034b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1035b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1036b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Returns a <code>Transliterator</code> object constructed from 1037b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the given rule string. This will be a RuleBasedTransliterator, 1038b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if the rule string contains only rules, or a 1039b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * CompoundTransliterator, if it contains ID blocks, or a 1040b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NullTransliterator, if it contains ID blocks which parse as 1041b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * empty for the given direction. 1042b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1043b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* U_EXPORT2 1044b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator::createFromRules(const UnicodeString& ID, 1045b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& rules, 1046b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir, 1047b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UParseError& parseError, 1048b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 1049b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 1050b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t = NULL; 1051b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1052b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorParser parser(status); 1053b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parser.parse(rules, dir, parseError, status); 1054b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1055b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 1056b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 1057b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1058b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1059b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NOTE: The logic here matches that in TransliteratorRegistry. 1060b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 0) { 1061b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new NullTransliterator(); 1062b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1063b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (parser.idBlockVector.size() == 0 && parser.dataVector.size() == 1) { 1064b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new RuleBasedTransliterator(ID, (TransliterationRuleData*)parser.dataVector.orphanElementAt(0), TRUE); 1065b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1066b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (parser.idBlockVector.size() == 1 && parser.dataVector.size() == 0) { 1067b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // idBlock, no data -- this is an alias. The ID has 1068b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // been munged from reverse into forward mode, if 1069b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // necessary, so instantiate the ID in the forward 1070b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // direction. 1071b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.compoundFilter != NULL) { 1072b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString filterPattern; 1073b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru parser.compoundFilter->toPattern(filterPattern, FALSE); 1074b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = createInstance(filterPattern + UnicodeString(ID_DELIM) 1075b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru + *((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); 1076b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1077b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1078b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = createInstance(*((UnicodeString*)parser.idBlockVector.elementAt(0)), UTRANS_FORWARD, parseError, status); 1079b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1080b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1081b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL) { 1082b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(ID); 1083b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1084b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1085b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else { 1086b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector transliterators(status); 1087b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t passNumber = 1; 1088b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1089b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t limit = parser.idBlockVector.size(); 1090b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (parser.dataVector.size() > limit) 1091b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru limit = parser.dataVector.size(); 1092b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1093b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i = 0; i < limit; i++) { 1094b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i < parser.idBlockVector.size()) { 1095b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* idBlock = (UnicodeString*)parser.idBlockVector.elementAt(i); 1096b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!idBlock->isEmpty()) { 1097b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* temp = createInstance(*idBlock, UTRANS_FORWARD, parseError, status); 1098b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (temp != NULL && temp->getDynamicClassID() != NullTransliterator::getStaticClassID()) 1099b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transliterators.addElement(temp, status); 1100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else 1101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete temp; 1102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!parser.dataVector.isEmpty()) { 1105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliterationRuleData* data = (TransliterationRuleData*)parser.dataVector.orphanElementAt(0); 1106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transliterators.addElement( 1107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru new RuleBasedTransliterator(UnicodeString(CompoundTransliterator::PASS_STRING) + (passNumber++), 1108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru data, TRUE), status); 1109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = new CompoundTransliterator(transliterators, passNumber - 1, parseError, status); 1113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->setID(ID); 1114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->adoptFilter(parser.orphanCompoundFilter()); 1115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 1117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::toRules(UnicodeString& rulesSource, 1120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool escapeUnprintable) const { 1121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The base class implementation of toRules munges the ID into 1122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the correct format. That is: foo => ::foo 1123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (escapeUnprintable) { 1124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.truncate(0); 1125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString id = getID(); 1126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t i=0; i<id.length();) { 1127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar32 c = id.char32At(i); 1128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::escapeUnprintable(rulesSource, c)) { 1129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.append(c); 1130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru i += UTF_CHAR_LENGTH(c); 1132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource = getID(); 1135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // KEEP in sync with rbt_pars 1137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.insert(0, UNICODE_STRING_SIMPLE("::")); 1138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru rulesSource.append(ID_DELIM); 1139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return rulesSource; 1140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::countElements() const { 1143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (this->getDynamicClassID() == 1144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru CompoundTransliterator::getStaticClassID()) ? 1145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ((const CompoundTransliterator*) this)->getCount() : 0; 1146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst Transliterator& Transliterator::getElement(int32_t index, UErrorCode& ec) const { 1149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 1150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1152b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const CompoundTransliterator* cpd = 1153b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (this->getDynamicClassID() == CompoundTransliterator::getStaticClassID()) ? 1154b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (const CompoundTransliterator*) this : 0; 1155b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t n = (cpd == NULL) ? 1 : cpd->getCount(); 1156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (index < 0 || index >= n) { 1157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INDEX_OUTOFBOUNDS_ERROR; 1158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *this; 1159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (n == 1) ? *this : cpd->getTransliterator(index); 1161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& Transliterator::getSourceSet(UnicodeSet& result) const { 1165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru handleGetSourceSet(result); 1166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 1167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* filterSet; 1168b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool deleteFilterSet = FALSE; 1169b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Most, but not all filters will be UnicodeSets. Optimize for 1170b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the high-runner case. 1171b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter->getDynamicClassID() == UnicodeSet::getStaticClassID()) { 1172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filterSet = (UnicodeSet*) filter; 1173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 1174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filterSet = new UnicodeSet(); 1175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru deleteFilterSet = TRUE; 1176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter->addMatchSetTo(*filterSet); 1177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.retainAll(*filterSet); 1179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (deleteFilterSet) { 1180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filterSet; 1181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::handleGetSourceSet(UnicodeSet& result) const { 1187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result.clear(); 1188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet& Transliterator::getTargetSet(UnicodeSet& result) const { 1191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result.clear(); 1192b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1193b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// For public consumption 1195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerFactory(const UnicodeString& id, 1196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Factory factory, 1197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Token context) { 1198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerFactory(id, factory, context); 1202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To be called only by Transliterator subclasses that are called 1206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to register themselves by initializeRegistry(). 1207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerFactory(const UnicodeString& id, 1208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Factory factory, 1209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator::Token context) { 1210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(id, factory, context, TRUE); 1211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// To be called only by Transliterator subclasses that are called 1214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// to register themselves by initializeRegistry(). 1215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerSpecialInverse(const UnicodeString& target, 1216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& inverseTarget, 1217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool bidirectional) { 1218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::registerSpecialInverse(target, inverseTarget, bidirectional, status); 1220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Registers a instance <tt>obj</tt> of a subclass of 1224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>Transliterator</code> with the system. This object must 1225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * implement the <tt>clone()</tt> method. When 1226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <tt>getInstance()</tt> is called with an ID string that is 1227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equal to <tt>obj.getID()</tt>, then <tt>obj.clone()</tt> is 1228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * returned. 1229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param obj an instance of subclass of 1231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <code>Transliterator</code> that defines <tt>clone()</tt> 1232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #getInstance 1233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #unregister 1234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerInstance(Transliterator* adoptedPrototype) { 1236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerInstance(adoptedPrototype); 1240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerInstance(Transliterator* adoptedPrototype) { 1244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(adoptedPrototype, TRUE); 1245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::registerAlias(const UnicodeString& aliasID, 1248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& realID) { 1249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerAlias(aliasID, realID); 1253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid Transliterator::_registerAlias(const UnicodeString& aliasID, 1257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& realID) { 1258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(aliasID, realID, FALSE, TRUE); 1259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Unregisters a transliterator or class. This may be either 1263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a system transliterator or a user transliterator or class. 1264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param ID the ID of the transliterator or class 1266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @see #registerInstance 1267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid U_EXPORT2 Transliterator::unregister(const UnicodeString& ID) { 1270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->remove(ID); 1274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * == OBSOLETE - remove in ICU 3.4 == 1279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the number of IDs currently registered with the system. 1280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * To retrieve the actual IDs, call getAvailableID(i) with 1281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * i from 0 to countAvailableIDs() - 1. 1282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableIDs(void) { 1284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return HAVE_REGISTRY ? registry->countAvailableIDs() : 0; 1287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * == OBSOLETE - remove in ICU 3.4 == 1291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Return the index-th available ID. index must be between 0 1292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and countAvailableIDs() - 1, inclusive. If index is out of 1293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * range, the result of getAvailableID(0) is returned. 1294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst UnicodeString& U_EXPORT2 Transliterator::getAvailableID(int32_t index) { 1296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString* result = NULL; 1297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = ®istry->getAvailableID(index); 1301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_ASSERT(result != NULL); // fail if no registry 1304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return *result; 1305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruStringEnumeration* U_EXPORT2 Transliterator::getAvailableIDs(UErrorCode& ec) { 1308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) return NULL; 1309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru StringEnumeration* result = NULL; 1310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(®istryMutex); 1312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru result = registry->getAvailableIDs(); 1314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(®istryMutex); 1316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (result == NULL) { 1317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INTERNAL_TRANSLITERATOR_ERROR; 1318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableSources(void) { 1323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return HAVE_REGISTRY ? _countAvailableSources() : 0; 1326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableSource(int32_t index, 1329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableSource(index, result); 1334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableTargets(const UnicodeString& source) { 1339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return HAVE_REGISTRY ? _countAvailableTargets(source) : 0; 1342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableTarget(int32_t index, 1345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableTarget(index, source, result); 1351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t U_EXPORT2 Transliterator::countAvailableVariants(const UnicodeString& source, 1356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target) { 1357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return HAVE_REGISTRY ? _countAvailableVariants(source, target) : 0; 1360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& U_EXPORT2 Transliterator::getAvailableVariant(int32_t index, 1363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target, 1365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_init(®istryMutex); 1367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(®istryMutex); 1368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (HAVE_REGISTRY) { 1369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _getAvailableVariant(index, source, target, result); 1370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return result; 1372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableSources(void) { 1375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableSources(); 1376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableSource(int32_t index, 1379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableSource(index, result); 1381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableTargets(const UnicodeString& source) { 1384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableTargets(source); 1385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableTarget(int32_t index, 1388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableTarget(index, source, result); 1391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t Transliterator::_countAvailableVariants(const UnicodeString& source, 1394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target) { 1395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->countAvailableVariants(source, target); 1396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeString& Transliterator::_getAvailableVariant(int32_t index, 1399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& source, 1400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target, 1401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& result) { 1402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return registry->getAvailableVariant(index, source, target, result); 1403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#ifdef U_USE_DEPRECATED_TRANSLITERATOR_API 1406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Method for subclasses to use to obtain a character in the given 1409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * string, with filtering. 1410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @deprecated the new architecture provides filtering at the top 1411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * level. This method will be removed Dec 31 2001. 1412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUChar Transliterator::filteredCharAt(const Replaceable& text, int32_t i) const { 1414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c; 1415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeFilter* localFilter = getFilter(); 1416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return (localFilter == 0) ? text.charAt(i) : 1417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (localFilter->contains(c = text.charAt(i)) ? c : (UChar)0xFFFE); 1418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif 1421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * If the registry is initialized, return TRUE. If not, initialize it 1424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * and return TRUE. If the registry cannot be initialized, return 1425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * FALSE (rare). 1426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * IMPORTANT: Upon entry, registryMutex must be LOCKED. The entirely 1428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * initialization is done with the lock held. There is NO REASON to 1429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unlock, since no other thread that is waiting on the registryMutex 1430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * cannot itself proceed until the registry is initialized. 1431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool Transliterator::initializeRegistry() { 1433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry != 0) { 1434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode status = U_ZERO_ERROR; 1438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = new TransliteratorRegistry(status); 1440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry == 0 || U_FAILURE(status)) { 1441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete registry; 1442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = 0; 1443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; // can't create registry, no recovery 1444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* The following code parses the index table located in 1447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * icu/data/translit/root.txt. The index is an n x 4 table 1448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that follows this format: 1449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * file{ 1451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resource{"<resource>"} 1452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction{"<direction>"} 1453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * internal{ 1457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * resource{"<resource>"} 1458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * direction{"<direction"} 1459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id>{ 1462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * alias{"<getInstanceArg"} 1463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * } 1464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <id> is the ID of the system transliterator being defined. These 1465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are public IDs enumerated by Transliterator.getAvailableIDs(), 1466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * unless the second field is "internal". 1467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <resource> is a ResourceReader resource name. Currently these refer 1469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * to file names under com/ibm/text/resources. This string is passed 1470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * directly to ResourceReader, together with <encoding>. 1471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <direction> is either "FORWARD" or "REVERSE". 1473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <getInstanceArg> is a string to be passed directly to 1475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterator.getInstance(). The returned Transliterator object 1476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then has its ID changed to <id> and is returned. 1477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 1478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The extra blank field on "alias" lines is to make the array square. 1479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru //static const char translit_index[] = "translit_index"; 1481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UResourceBundle *bundle, *transIDs, *colBund; 1483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bundle = ures_open(U_ICUDATA_TRANSLIT, NULL/*open default locale*/, &status); 1484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru transIDs = ures_getByKey(bundle, RB_RULE_BASED_IDS, 0, &status); 1485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t row, maxRows; 1487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru maxRows = ures_getSize(transIDs); 1489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (row = 0; row < maxRows; row++) { 1490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru colBund = ures_getByIndex(transIDs, row, 0, &status); 1491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString id(ures_getKey(colBund), -1, US_INV); 1493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UResourceBundle* res = ures_getNextResource(colBund, NULL, &status); 1494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const char* typeStr = ures_getKey(res); 1495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar type; 1496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru u_charsToUChars(typeStr, &type, 1); 1497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(status)) { 1499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t len = 0; 1500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UChar *resString; 1501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (type) { 1502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x66: // 'f' 1503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x69: // 'i' 1504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 'file' or 'internal'; 1505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // row[2]=resource, row[3]=direction 1506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru { 1507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = ures_getStringByKey(res, "resource", &len, &status); 1509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool visible = (type == 0x0066 /*f*/); 1510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTransDirection dir = 1511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (ures_getUnicodeStringByKey(res, "direction", &status).charAt(0) == 1512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 0x0046 /*F*/) ? 1513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UTRANS_FORWARD : UTRANS_REVERSE; 1514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(id, UnicodeString(TRUE, resString, len), dir, TRUE, visible); 1515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0x61: // 'a' 1518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // 'alias'; row[2]=createInstance argument 1519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru resString = ures_getString(res, &len, &status); 1520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(id, UnicodeString(TRUE, resString, len), TRUE, TRUE); 1521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 1522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(res); 1525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(colBund); 1527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(transIDs); 1531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ures_close(bundle); 1532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Manually add prototypes that the system knows about to the 1534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // cache. This is how new non-rule-based transliterators are 1535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // added to the system. 1536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new NullTransliterator(), TRUE); 1538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new LowercaseTransliterator(), TRUE); 1539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new UppercaseTransliterator(), TRUE); 1540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new TitlecaseTransliterator(), TRUE); 1541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new UnicodeNameTransliterator(), TRUE); 1542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry->put(new NameUnicodeTransliterator(), TRUE); 1543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RemoveTransliterator::registerIDs(); // Must be within mutex 1545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru EscapeTransliterator::registerIDs(); 1546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnescapeTransliterator::registerIDs(); 1547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru NormalizationTransliterator::registerIDs(); 1548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru AnyTransliterator::registerIDs(); 1549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Null"), 1551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Null"), FALSE); 1552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Upper"), 1553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Lower"), TRUE); 1554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru _registerSpecialInverse(UNICODE_STRING_SIMPLE("Title"), 1555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UNICODE_STRING_SIMPLE("Lower"), FALSE); 1556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, transliterator_cleanup); 1558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 1563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Defined in ucln_in.h: 1565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 1567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Release all static memory held by transliterator. This will 1568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * necessarily invalidate any rule-based transliterators held by the 1569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * user, because RBTs hold pointers to common data objects. 1570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 1571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CFUNC UBool transliterator_cleanup(void) { 1572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru U_NAMESPACE_USE 1573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru TransliteratorIDParser::cleanup(); 1574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (registry) { 1575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete registry; 1576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru registry = NULL; 1577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 1578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_destroy(®istryMutex); 1579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 1580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 1581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 1583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 1584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//eof 1585