1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (c) 2002-2014, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/06/2002 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru***************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uscript.h" 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "anytrans.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "mutex.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "nultrans.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "putilimp.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "tridpars.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uinvchar.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvector.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constants 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar TARGET_SEP = 45; // '-' 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar VARIANT_SEP = 47; // '/' 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar ANY[] = {65,110,121,0}; // "Any" 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar NULL_ID[] = {78,117,108,108,0}; // "Null" 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-" 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Deleter function for Transliterator*. 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_deleteTransliterator(void *obj) { 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete (icu::Transliterator*) obj; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ScriptRunIterator 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a series of ranges corresponding to scripts. They will be 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the form: 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ccccSScSSccccTTcTcccc - c = common, S = first script, T = second 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * | | - first run (start, limit) 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * | | - second run (start, limit) 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * That is, the runs will overlap. The reason for this is so that a 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterator can consider common characters both before and after 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the scripts. 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ScriptRunIterator : public UMemory { 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const Replaceable& text; 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t textStart; 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t textLimit; 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic: 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The code of the current run, valid after next() returns. May 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * be USCRIPT_INVALID_CODE if and only if the entire text is 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * COMMON/INHERITED. 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode scriptCode; 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The start of the run, inclusive, valid after next() returns. 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start; 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The end of the run, exclusive, valid after next() returns. 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit; 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a run iterator over the given text from start 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (inclusive) to limit (exclusive). 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit); 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns TRUE if there are any more runs. TRUE is always 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * returned at least once. Upon return, the caller should 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * examine scriptCode, start, and limit. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool next(); 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Adjusts internal indices for a change in the limit index of the 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * given delta. A positive delta means the limit has increased. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru void adjustLimit(int32_t delta); 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate: 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator(const ScriptRunIterator &other); // forbid copying of this class 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator &operator=(const ScriptRunIterator &other); // forbid copying of this class 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruScriptRunIterator::ScriptRunIterator(const Replaceable& theText, 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t myStart, int32_t myLimit) : 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text(theText) 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textStart = myStart; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textLimit = myLimit; 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit = myStart; 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool ScriptRunIterator::next() { 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode s; 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scriptCode = USCRIPT_INVALID_CODE; // don't know script yet 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start = limit; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Are we done? 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (start == textLimit) { 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return FALSE; 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Move start back to include adjacent COMMON or INHERITED 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // characters 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (start > textStart) { 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = text.char32At(start - 1); // look back 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s = uscript_getScript(ch, &ec); 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) { 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --start; 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Move limit ahead to include COMMON, INHERITED, and characters 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // of the current script. 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (limit < textLimit) { 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = text.char32At(limit); // look ahead 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru s = uscript_getScript(ch, &ec); 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) { 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (scriptCode == USCRIPT_INVALID_CODE) { 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru scriptCode = s; 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (s != scriptCode) { 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++limit; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Return TRUE even if the entire text is COMMON / INHERITED, in 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which case scriptCode will be USCRIPT_INVALID_CODE. 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return TRUE; 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid ScriptRunIterator::adjustLimit(int32_t delta) { 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit += delta; 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru textLimit += delta; 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------ 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// AnyTransliterator 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator) 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const UnicodeString& id, 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& theTarget, 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UnicodeString& theVariant, 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode theTargetScript, 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& ec) : 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, NULL), 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius targetScript(theTargetScript) 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); 18885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (U_FAILURE(ec)) { 18985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 19085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(cache, _deleteTransliterator); 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target = theTarget; 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (theVariant.length() > 0) { 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target.append(VARIANT_SEP).append(theVariant); 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::~AnyTransliterator() { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_close(cache); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const AnyTransliterator& o) : 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(o), 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru target(o.target), 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetScript(o.targetScript) 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Don't copy the cache contents 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec); 21485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (U_FAILURE(ec)) { 21585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 21685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(cache, _deleteTransliterator); 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::clone() const { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new AnyTransliterator(*this); 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos, 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isIncremental) const { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t allStart = pos.start; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t allLimit = pos.limit; 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ScriptRunIterator it(text, pos.contextStart, pos.contextLimit); 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (it.next()) { 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore runs in the ante context 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (it.limit <= allStart) continue; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Try to instantiate transliterator from it.scriptCode to 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // our target or target/variant 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator* t = getTransliterator(it.scriptCode); 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t == NULL) { 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We have no transliterator. Do nothing, but keep 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // pos.start up to date. 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.start = it.limit; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru continue; 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the run end is before the transliteration limit, do 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // a non-incremental transliteration. Otherwise do an 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // incremental one. 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental = isIncremental && (it.limit >= allLimit); 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.start = uprv_max(allStart, it.start); 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.limit = uprv_min(allLimit, it.limit); 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = pos.limit; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t->filteredTransliterate(text, pos, incremental); 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = pos.limit - limit; 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru allLimit += delta; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru it.adjustLimit(delta); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We're done if we enter the post context 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (it.limit >= allLimit) break; 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Restore limit. pos.start is fine where the last transliterator 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // left it, or at the end of the last run. 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos.limit = allLimit; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::getTransliterator(UScriptCode source) const { 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (source == targetScript || source == USCRIPT_INVALID_CODE) { 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return NULL; 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Transliterator* t = NULL; 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Mutex m(NULL); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius t = (Transliterator*) uhash_iget(cache, (int32_t) source); 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t == NULL) { 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString sourceName(uscript_getName(source), -1, US_INV); 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString id(sourceName); 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id.append(TARGET_SEP).append(target); 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec) || t == NULL) { 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Try to pivot around Latin, our most common script 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id = sourceName; 29783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius id.append(LATIN_PIVOT, -1).append(target); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = Transliterator::createInstance(id, UTRANS_FORWARD, ec); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec) || t == NULL) { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru t = NULL; 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t != NULL) { 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Transliterator *rt = NULL; 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Mutex m(NULL); 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source)); 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (rt == NULL) { 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Common case, no race to cache this new transliterator. 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_iput(cache, (int32_t) source, t, &ec); 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Race case, some other thread beat us to caching this transliterator. 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius Transliterator *temp = rt; 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rt = t; // Our newly created transliterator that lost the race & now needs deleting. 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius t = temp; // The transliterator from the cache that we will return. 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete rt; // will be non-null only in case of races. 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return t; 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the script code for a given name, or -1 if not found. 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UScriptCode scriptNameToCode(const UnicodeString& name) { 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char buf[128]; 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode code; 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t nameLen = name.length(); 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen); 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (isInvariant) { 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV); 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf[127] = 0; // Make sure that we NULL terminate the string. 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec)) 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru code = USCRIPT_INVALID_CODE; 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return code; 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Registers standard transliterators with the system. Called by 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator during initialization. Scan all current targets and 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * register those that are scripts T as Any-T/V. 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::registerIDs() { 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Hashtable seen(TRUE, ec); 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sourceCount = Transliterator::_countAvailableSources(); 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t s=0; s<sourceCount; ++s) { 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString source; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableSource(s, source); 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Ignore the "Any" source 36383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (source.caseCompare(ANY, 3, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t targetCount = Transliterator::_countAvailableTargets(source); 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t t=0; t<targetCount; ++t) { 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString target; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableTarget(t, source, target); 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Only process each target once 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (seen.geti(target) != 0) continue; 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ZERO_ERROR; 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru seen.puti(target, 1, ec); 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Get the script code for the target. If not a script, ignore. 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UScriptCode targetScript = scriptNameToCode(target); 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (targetScript == USCRIPT_INVALID_CODE) continue; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t variantCount = Transliterator::_countAvailableVariants(source, target); 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(variantCount >= 1); 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t v=0; v<variantCount; ++v) { 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString variant; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_getAvailableVariant(v, source, target, variant); 384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString id; 38683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), target, variant, id); 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ec = U_ZERO_ERROR; 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru AnyTransliterator* t = new AnyTransliterator(id, target, variant, 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru targetScript, ec); 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(ec)) { 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete t; 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::_registerInstance(t); 39483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius Transliterator::_registerSpecialInverse(target, UnicodeString(TRUE, NULL_ID, 4), FALSE); 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 406