1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho* Copyright (c) 2002-2008, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/06/2002 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uscript.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "nultrans.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "anytrans.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tridpars.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "putilimp.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uinvchar.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constants 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar TARGET_SEP = 45; // '-' 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar VARIANT_SEP = 47; // '/' 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar ANY[] = {65,110,121,0}; // "Any" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-" 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Deleter function for Transliterator*. 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_deleteTransliterator(void *obj) { 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete (U_NAMESPACE_QUALIFIER Transliterator*) obj; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ScriptRunIterator 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a series of ranges corresponding to scripts. They will be 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the form: 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * | | - first run (start, limit) 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * | | - second run (start, limit) 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * That is, the runs will overlap. The reason for this is so that a 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterator can consider common characters both before and after 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the scripts. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ScriptRunIterator : public UMemory { 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const Replaceable& text; 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t textStart; 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t textLimit; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The code of the current run, valid after next() returns. May 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be USCRIPT_INVALID_CODE if and only if the entire text is 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COMMON/INHERITED. 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode scriptCode; 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The start of the run, inclusive, valid after next() returns. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start; 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the run, exclusive, valid after next() returns. 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit; 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a run iterator over the given text from start 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (inclusive) to limit (exclusive). 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns TRUE if there are any more runs. TRUE is always 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned at least once. Upon return, the caller should 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * examine scriptCode, start, and limit. 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool next(); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Adjusts internal indices for a change in the limit index of the 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given delta. A positive delta means the limit has increased. 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void adjustLimit(int32_t delta); 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator(const ScriptRunIterator &other); // forbid copying of this class 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator &operator=(const ScriptRunIterator &other); // forbid copying of this class 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruScriptRunIterator::ScriptRunIterator(const Replaceable& theText, 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t myStart, int32_t myLimit) : 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text(theText) 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textStart = myStart; 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textLimit = myLimit; 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit = myStart; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool ScriptRunIterator::next() { 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode s; 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scriptCode = USCRIPT_INVALID_CODE; // don't know script yet 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = limit; 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Are we done? 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (start == textLimit) { 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Move start back to include adjacent COMMON or INHERITED 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // characters 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (start > textStart) { 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = text.char32At(start - 1); // look back 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s = uscript_getScript(ch, &ec); 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) { 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --start; 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Move limit ahead to include COMMON, INHERITED, and characters 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // of the current script. 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (limit < textLimit) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = text.char32At(limit); // look ahead 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s = uscript_getScript(ch, &ec); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) { 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (scriptCode == USCRIPT_INVALID_CODE) { 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scriptCode = s; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (s != scriptCode) { 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++limit; 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Return TRUE even if the entire text is COMMON / INHERITED, in 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which case scriptCode will be USCRIPT_INVALID_CODE. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid ScriptRunIterator::adjustLimit(int32_t delta) { 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit += delta; 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textLimit += delta; 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// AnyTransliterator 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const UnicodeString& id, 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& theTarget, 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& theVariant, 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode theTargetScript, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& ec) : 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, NULL), 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetScript(theTargetScript) 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); 18685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (U_FAILURE(ec)) { 18785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 18885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(cache, _deleteTransliterator); 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target = theTarget; 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (theVariant.length() > 0) { 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target.append(VARIANT_SEP).append(theVariant); 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::~AnyTransliterator() { 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_close(cache); 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const AnyTransliterator& o) : 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target(o.target), 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetScript(o.targetScript) 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't copy the cache contents 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); 21285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (U_FAILURE(ec)) { 21385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 21485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(cache, _deleteTransliterator); 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::clone() const { 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new AnyTransliterator(*this); 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos, 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const { 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t allStart = pos.start; 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t allLimit = pos.limit; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (it.next()) { 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore runs in the ante context 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (it.limit <= allStart) continue; 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Try to instantiate transliterator from it.scriptCode to 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // our target or target/variant 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator* t = getTransliterator(it.scriptCode); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t == NULL) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We have no transliterator. Do nothing, but keep 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // pos.start up to date. 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.start = it.limit; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the run end is before the transliteration limit, do 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a non-incremental transliteration. Otherwise do an 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // incremental one. 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental = isIncremental && (it.limit >= allLimit); 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.start = uprv_max(allStart, it.start); 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.limit = uprv_min(allLimit, it.limit); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = pos.limit; 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t->filteredTransliterate(text, pos, incremental); 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = pos.limit - limit; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru allLimit += delta; 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru it.adjustLimit(delta); 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We're done if we enter the post context 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (it.limit >= allLimit) break; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Restore limit. pos.start is fine where the last transliterator 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // left it, or at the end of the last run. 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.limit = allLimit; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (source == targetScript || source == USCRIPT_INVALID_CODE) { 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator* t = (Transliterator*) uhash_iget(cache, (int32_t) source); 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t == NULL) { 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString sourceName(uscript_getName(source), -1, US_INV); 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString id(sourceName); 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id.append(TARGET_SEP).append(target); 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec) || t == NULL) { 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Try to pivot around Latin, our most common script 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id = sourceName; 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id.append(LATIN_PIVOT).append(target); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec) || t == NULL) { 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = NULL; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t != NULL) { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_iput(cache, (int32_t) source, t, &ec); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return t; 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the script code for a given name, or -1 if not found. 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UScriptCode scriptNameToCode(const UnicodeString& name) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buf[128]; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode code; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t nameLen = name.length(); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isInvariant) { 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf[127] = 0; // Make sure that we NULL terminate the string. 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec)) 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code = USCRIPT_INVALID_CODE; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return code; 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Registers standard transliterators with the system. Called by 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator during initialization. Scan all current targets and 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * register those that are scripts T as Any-T/V. 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::registerIDs() { 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Hashtable seen(TRUE, ec); 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceCount = Transliterator::_countAvailableSources(); 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t s=0; s<sourceCount; ++s) { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString source; 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableSource(s, source); 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore the "Any" source 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (source.caseCompare(ANY, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCount = Transliterator::_countAvailableTargets(source); 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t t=0; t<targetCount; ++t) { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString target; 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableTarget(t, source, target); 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Only process each target once 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (seen.geti(target) != 0) continue; 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ZERO_ERROR; 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru seen.puti(target, 1, ec); 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the script code for the target. If not a script, ignore. 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode targetScript = scriptNameToCode(target); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (targetScript == USCRIPT_INVALID_CODE) continue; 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t variantCount = Transliterator::_countAvailableVariants(source, target); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(variantCount >= 1); 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t v=0; v<variantCount; ++v) { 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString variant; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableVariant(v, source, target, variant); 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString id; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorIDParser::STVtoID(ANY, target, variant, id); 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ZERO_ERROR; 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru AnyTransliterator* t = new AnyTransliterator(id, target, variant, 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetScript, ec); 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec)) { 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerInstance(t); 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerSpecialInverse(target, NULL_ID, FALSE); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 387