1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*****************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (c) 2002-2014, International Business Machines Corporation
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*****************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 06/06/2002  aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*****************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uobject.h"
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uscript.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "anytrans.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "hash.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "mutex.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "nultrans.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "putilimp.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "tridpars.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uinvchar.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvector.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// Constants
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar TARGET_SEP = 45; // '-'
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar VARIANT_SEP = 47; // '/'
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar ANY[] = {65,110,121,0}; // "Any"
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar NULL_ID[] = {78,117,108,108,0}; // "Null"
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar LATIN_PIVOT[] = {45,76,97,116,105,110,59,76,97,116,105,110,45,0}; // "-Latin;Latin-"
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_BEGIN
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Deleter function for Transliterator*.
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru_deleteTransliterator(void *obj) {
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete (icu::Transliterator*) obj;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_CDECL_END
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// ScriptRunIterator
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns a series of ranges corresponding to scripts. They will be
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the form:
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ccccSScSSccccTTcTcccc   - c = common, S = first script, T = second
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * |            |          - first run (start, limit)
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          |           |  - second run (start, limit)
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * That is, the runs will overlap. The reason for this is so that a
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterator can consider common characters both before and after
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the scripts.
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruclass ScriptRunIterator : public UMemory {
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const Replaceable& text;
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t textStart;
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t textLimit;
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querupublic:
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The code of the current run, valid after next() returns.  May
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * be USCRIPT_INVALID_CODE if and only if the entire text is
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * COMMON/INHERITED.
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UScriptCode scriptCode;
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The start of the run, inclusive, valid after next() returns.
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t start;
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * The end of the run, exclusive, valid after next() returns.
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t limit;
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Constructs a run iterator over the given text from start
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * (inclusive) to limit (exclusive).
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ScriptRunIterator(const Replaceable& text, int32_t start, int32_t limit);
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Returns TRUE if there are any more runs.  TRUE is always
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * returned at least once.  Upon return, the caller should
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * examine scriptCode, start, and limit.
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool next();
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /**
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * Adjusts internal indices for a change in the limit index of the
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     * given delta.  A positive delta means the limit has increased.
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru     */
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    void adjustLimit(int32_t delta);
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprivate:
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ScriptRunIterator(const ScriptRunIterator &other); // forbid copying of this class
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ScriptRunIterator &operator=(const ScriptRunIterator &other); // forbid copying of this class
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruScriptRunIterator::ScriptRunIterator(const Replaceable& theText,
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     int32_t myStart, int32_t myLimit) :
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    text(theText)
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textStart = myStart;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textLimit = myLimit;
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit = myStart;
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUBool ScriptRunIterator::next() {
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UScriptCode s;
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    scriptCode = USCRIPT_INVALID_CODE; // don't know script yet
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    start = limit;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Are we done?
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (start == textLimit) {
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return FALSE;
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Move start back to include adjacent COMMON or INHERITED
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // characters
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (start > textStart) {
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = text.char32At(start - 1); // look back
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s = uscript_getScript(ch, &ec);
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (s == USCRIPT_COMMON || s == USCRIPT_INHERITED) {
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            --start;
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            break;
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Move limit ahead to include COMMON, INHERITED, and characters
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // of the current script.
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (limit < textLimit) {
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ch = text.char32At(limit); // look ahead
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        s = uscript_getScript(ch, &ec);
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (s != USCRIPT_COMMON && s != USCRIPT_INHERITED) {
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (scriptCode == USCRIPT_INVALID_CODE) {
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                scriptCode = s;
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else if (s != scriptCode) {
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                break;
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ++limit;
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Return TRUE even if the entire text is COMMON / INHERITED, in
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // which case scriptCode will be USCRIPT_INVALID_CODE.
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return TRUE;
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid ScriptRunIterator::adjustLimit(int32_t delta) {
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit += delta;
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    textLimit += delta;
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//------------------------------------------------------------
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// AnyTransliterator
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(AnyTransliterator)
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const UnicodeString& id,
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     const UnicodeString& theTarget,
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     const UnicodeString& theVariant,
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UScriptCode theTargetScript,
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                     UErrorCode& ec) :
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(id, NULL),
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    targetScript(theTargetScript)
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
18885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if (U_FAILURE(ec)) {
18985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
19085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uhash_setValueDeleter(cache, _deleteTransliterator);
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target = theTarget;
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (theVariant.length() > 0) {
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        target.append(VARIANT_SEP).append(theVariant);
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::~AnyTransliterator() {
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uhash_close(cache);
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor.
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAnyTransliterator::AnyTransliterator(const AnyTransliterator& o) :
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Transliterator(o),
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    target(o.target),
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    targetScript(o.targetScript)
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Don't copy the cache contents
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cache = uhash_open(uhash_hashLong, uhash_compareLong, NULL, &ec);
21485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if (U_FAILURE(ec)) {
21585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
21685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uhash_setValueDeleter(cache, _deleteTransliterator);
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API.
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::clone() const {
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new AnyTransliterator(*this);
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}.
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::handleTransliterate(Replaceable& text, UTransPosition& pos,
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                            UBool isIncremental) const {
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t allStart = pos.start;
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t allLimit = pos.limit;
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ScriptRunIterator it(text, pos.contextStart, pos.contextLimit);
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (it.next()) {
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Ignore runs in the ante context
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (it.limit <= allStart) continue;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Try to instantiate transliterator from it.scriptCode to
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // our target or target/variant
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Transliterator* t = getTransliterator(it.scriptCode);
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (t == NULL) {
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // We have no transliterator.  Do nothing, but keep
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // pos.start up to date.
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            pos.start = it.limit;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            continue;
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // If the run end is before the transliteration limit, do
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // a non-incremental transliteration.  Otherwise do an
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // incremental one.
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UBool incremental = isIncremental && (it.limit >= allLimit);
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos.start = uprv_max(allStart, it.start);
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos.limit = uprv_min(allLimit, it.limit);
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t limit = pos.limit;
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        t->filteredTransliterate(text, pos, incremental);
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t delta = pos.limit - limit;
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        allLimit += delta;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        it.adjustLimit(delta);
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // We're done if we enter the post context
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (it.limit >= allLimit) break;
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Restore limit.  pos.start is fine where the last transliterator
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // left it, or at the end of the last run.
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pos.limit = allLimit;
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* AnyTransliterator::getTransliterator(UScriptCode source) const {
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (source == targetScript || source == USCRIPT_INVALID_CODE) {
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        return NULL;
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    Transliterator* t = NULL;
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    {
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        Mutex m(NULL);
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        t = (Transliterator*) uhash_iget(cache, (int32_t) source);
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (t == NULL) {
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeString sourceName(uscript_getName(source), -1, US_INV);
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeString id(sourceName);
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        id.append(TARGET_SEP).append(target);
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (U_FAILURE(ec) || t == NULL) {
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            delete t;
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Try to pivot around Latin, our most common script
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            id = sourceName;
29783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            id.append(LATIN_PIVOT, -1).append(target);
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            t = Transliterator::createInstance(id, UTRANS_FORWARD, ec);
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (U_FAILURE(ec) || t == NULL) {
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                delete t;
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                t = NULL;
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (t != NULL) {
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            Transliterator *rt = NULL;
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            {
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                Mutex m(NULL);
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                rt = static_cast<Transliterator *> (uhash_iget(cache, (int32_t) source));
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if (rt == NULL) {
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Common case, no race to cache this new transliterator.
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uhash_iput(cache, (int32_t) source, t, &ec);
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Race case, some other thread beat us to caching this transliterator.
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    Transliterator *temp = rt;
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    rt = t;    // Our newly created transliterator that lost the race & now needs deleting.
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    t  = temp; // The transliterator from the cache that we will return.
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            delete rt;    // will be non-null only in case of races.
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return t;
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the script code for a given name, or -1 if not found.
328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UScriptCode scriptNameToCode(const UnicodeString& name) {
330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char buf[128];
331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UScriptCode code;
332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t nameLen = name.length();
334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool isInvariant = uprv_isInvariantUString(name.getBuffer(), nameLen);
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (isInvariant) {
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        name.extract(0, nameLen, buf, (int32_t)sizeof(buf), US_INV);
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        buf[127] = 0;   // Make sure that we NULL terminate the string.
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (!isInvariant || uscript_getCode(buf, &code, 1, &ec) != 1 || U_FAILURE(ec))
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    {
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        code = USCRIPT_INVALID_CODE;
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return code;
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Registers standard transliterators with the system.  Called by
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator during initialization.  Scan all current targets and
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * register those that are scripts T as Any-T/V.
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid AnyTransliterator::registerIDs() {
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    Hashtable seen(TRUE, ec);
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sourceCount = Transliterator::_countAvailableSources();
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t s=0; s<sourceCount; ++s) {
359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeString source;
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        Transliterator::_getAvailableSource(s, source);
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Ignore the "Any" source
36383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        if (source.caseCompare(ANY, 3, 0 /*U_FOLD_CASE_DEFAULT*/) == 0) continue;
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t targetCount = Transliterator::_countAvailableTargets(source);
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (int32_t t=0; t<targetCount; ++t) {
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString target;
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            Transliterator::_getAvailableTarget(t, source, target);
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Only process each target once
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (seen.geti(target) != 0) continue;
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ec = U_ZERO_ERROR;
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            seen.puti(target, 1, ec);
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Get the script code for the target.  If not a script, ignore.
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UScriptCode targetScript = scriptNameToCode(target);
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (targetScript == USCRIPT_INVALID_CODE) continue;
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t variantCount = Transliterator::_countAvailableVariants(source, target);
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // assert(variantCount >= 1);
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            for (int32_t v=0; v<variantCount; ++v) {
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UnicodeString variant;
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                Transliterator::_getAvailableVariant(v, source, target, variant);
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                UnicodeString id;
38683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                TransliteratorIDParser::STVtoID(UnicodeString(TRUE, ANY, 3), target, variant, id);
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ec = U_ZERO_ERROR;
388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                AnyTransliterator* t = new AnyTransliterator(id, target, variant,
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                             targetScript, ec);
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (U_FAILURE(ec)) {
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    delete t;
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                } else {
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    Transliterator::_registerInstance(t);
39483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius                    Transliterator::_registerSpecialInverse(target, UnicodeString(TRUE, NULL_ID, 4), FALSE);
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
406