1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius*   Copyright (c) 2002-2014, International Business Machines Corporation
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   01/14/2002  aliu        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tridpars.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "mutex.h"
18f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "transreg.h"
19f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "uassert.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/translit.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utrans.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ID_DELIM    = 0x003B; // ;
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar TARGET_SEP  = 0x002D; // -
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar VARIANT_SEP = 0x002F; // /
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar OPEN_REV    = 0x0028; // (
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar CLOSE_REV   = 0x0029; // )
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar EMPTY[]     = {0}; // ""
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY[]       = {65,110,121,0}; // "Any"
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY_NULL[]  = {65,110,121,45,78,117,108,108,0}; // "Any-Null"
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t FORWARD = UTRANS_FORWARD;
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t REVERSE = UTRANS_REVERSE;
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic Hashtable* SPECIAL_INVERSES = NULL;
46f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UInitOnce gSpecialInversesInitOnce = U_INITONCE_INITIALIZER;
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The mutex controlling access to SPECIAL_INVERSES
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
5154dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex LOCK = U_MUTEX_INITIALIZER;
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs::Specs(const UnicodeString& s, const UnicodeString& t,
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& v, UBool sawS,
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& f) {
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = s;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    target = t;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    variant = v;
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sawSource = sawS;
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = f;
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b,
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           const UnicodeString& f) {
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID = c;
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    basicID = b;
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = f;
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b) {
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID = c;
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    basicID = b;
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::SingleID::createInstance() {
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator* t;
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (basicID.length() == 0) {
78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), &canonID);
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t = createBasicInstance(basicID, &canonID);
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (t != NULL) {
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter.length() != 0) {
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode ec = U_ZERO_ERROR;
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *set = new UnicodeSet(filter, ec);
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete set;
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t->adoptFilter(set);
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return t;
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a single ID, that is, an ID of the general form
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, the filters optional, and the variants optional.
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.  If the direction is REVERSE then the
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID is constructed for the reverse direction.
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or NULL
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      int32_t dir, UErrorCode& status) {
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The ID will be of the form A, A(), A(B), or (B), where
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // A and B are filter IDs.
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specsA = NULL;
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specsB = NULL;
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawParen = FALSE;
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // On the first pass, look for (B) or ().  If this fails, then
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // on the second pass, look for A, A(B), or A().
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t pass=1; pass<=2; ++pass) {
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pass == 2) {
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            specsA = parseFilterID(id, pos, TRUE);
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsA == NULL) {
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pos = start;
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ICU_Utility::parseChar(id, pos, OPEN_REV)) {
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sawParen = TRUE;
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                specsB = parseFilterID(id, pos, TRUE);
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Must close with a ')'
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (specsB == NULL || !ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    delete specsA;
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pos = start;
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assemble return results
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SingleID* single;
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sawParen) {
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            SingleID* b = specsToID(specsB, FORWARD);
151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsA, FORWARD);
152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Null pointers check
153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (b == NULL || single == NULL) {
154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete b;
155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete single;
156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	status = U_MEMORY_ALLOCATION_ERROR;
157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	return NULL;
158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single->canonID.append(OPEN_REV)
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(b->canonID).append(CLOSE_REV);
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsA != NULL) {
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single->filter = specsA->filter;
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete b;
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            SingleID* a = specsToID(specsA, FORWARD);
167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsB, FORWARD);
168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Check for null pointer.
169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (a == NULL || single == NULL) {
170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete a;
171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete single;
172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	status = U_MEMORY_ALLOCATION_ERROR;
173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	return NULL;
174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single->canonID.append(OPEN_REV)
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(a->canonID).append(CLOSE_REV);
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsB != NULL) {
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single->filter = specsB->filter;
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete a;
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // assert(specsA != NULL);
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsA, FORWARD);
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToSpecialInverse(*specsA, status);
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (single == NULL) {
189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single = specsToID(specsA, REVERSE);
190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Check for NULL pointer
193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (single == NULL) {
194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        	status = U_MEMORY_ALLOCATION_ERROR;
195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        	return NULL;
196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        single->filter = specsA->filter;
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specsA;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specsB;
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return single;
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a filter ID, that is, an ID of the general form
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or null if the parse fails
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specs = parseFilterID(id, pos, TRUE);
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (specs == NULL) {
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = start;
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assemble return results
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SingleID* single = specsToID(specs, FORWARD);
228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (single != NULL) {
229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        single->filter = specs->filter;
230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specs;
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return single;
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a global filter of the form "[f]" or "([f])", depending
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on 'withParens'.
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param withParens INPUT-OUTPUT parameter.  On entry, if
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * withParens is 0, then parens are disallowed.  If it is 1,
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then parens are requires.  If it is -1, then parens are
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, and the return result will be set to 0 or 1.
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter.  The pattern for the filter
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added to the canonID, either at the end, if dir is FORWARD, or
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at the start, if dir is REVERSE.  The pattern will be enclosed
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in parentheses if appropriate, and will be suffixed with an
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ID_DELIM character.  May be NULL.
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a UnicodeSet object or NULL.  A non-NULL results
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indicates a successful parse, regardless of whether the filter
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * applies to the given direction.  The caller should discard it
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if withParens != (dir == REVERSE).
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, int32_t& pos,
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      int32_t dir,
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      int32_t& withParens,
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      UnicodeString* canonID) {
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* filter = NULL;
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (withParens == -1) {
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        withParens = ICU_Utility::parseChar(id, pos, OPEN_REV) ? 1 : 0;
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (withParens == 1) {
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, OPEN_REV)) {
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ICU_Utility::skipWhitespace(id, pos, TRUE);
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (UnicodeSet::resemblesPattern(id, pos)) {
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ParsePosition ppos(pos);
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, NULL, ec);
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* test for NULL */
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter == 0) {
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete filter;
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString pattern;
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(pos, ppos.getIndex(), pattern);
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = ppos.getIndex();
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (withParens == 1 && !ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // In the forward direction, append the pattern to the
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // canonID.  In the reverse, insert it at zero, and invert
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the presence of parens ("A" <-> "(A)").
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canonID != NULL) {
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (dir == FORWARD) {
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (withParens == 1) {
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.insert(0, OPEN_REV);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.append(CLOSE_REV);
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->append(pattern).append(ID_DELIM);
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (withParens == 0) {
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.insert(0, OPEN_REV);
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.append(CLOSE_REV);
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->insert(0, pattern);
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->insert(pattern.length(), ID_DELIM);
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return filter;
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteSingleID(void* obj) {
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete (TransliteratorIDParser::SingleID*) obj;
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteTransliteratorTrIDPars(void* obj) {
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete (Transliterator*) obj;
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a compound ID, consisting of an optional forward global
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter, a separator, one or more single IDs delimited by
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separators, an an optional reverse global filter.  The
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separator is a semicolon.  The global filters are UnicodeSet
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * patterns.  The reverse global filter must be enclosed in
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parentheses.
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter that receives the canonical ID,
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * consisting of canonical IDs for all elements, as returned by
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parseSingleID(), separated by semicolons.  Previous contents
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are discarded.
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list OUTPUT parameter that receives a list of SingleID
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects representing the parsed IDs.  Previous contents are
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * discarded.
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param globalFilter OUTPUT parameter that receives a pointer to
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a newly created global filter for this ID in this direction, or
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL if there is none.
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if the parse succeeds, that is, if the entire
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is consumed without syntax error.
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t dir,
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UnicodeString& canonID,
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UVector& list,
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UnicodeSet*& globalFilter) {
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t pos = 0;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t withParens = 1;
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* filter;
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    globalFilter = NULL;
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID.truncate(0);
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Parse leading global filter, if any
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    withParens = 0; // parens disallowed
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (filter != NULL) {
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Not a global filter; backup and resume
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            canonID.truncate(0);
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = 0;
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            globalFilter = filter;
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete filter;
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = NULL;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawDelimiter = TRUE;
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = parseSingleID(id, pos, dir, ec);
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (single == NULL) {
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.addElement(single, ec);
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.insertElementAt(single, 0, ec);
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto FAIL;
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sawDelimiter = FALSE;
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (list.size() == 0) {
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto FAIL;
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Construct canonical ID
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<list.size(); ++i) {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = (SingleID*) list.elementAt(i);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        canonID.append(single->canonID);
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i != (list.size()-1)) {
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            canonID.append(ID_DELIM);
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Parse trailing global filter, if any, and only if we saw
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a trailing delimiter after the IDs.
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sawDelimiter) {
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        withParens = 1; // parens required
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter != NULL) {
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Don't require trailing ';', but parse it if present
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ICU_Utility::parseChar(id, pos, ID_DELIM);
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (dir == REVERSE) {
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                globalFilter = filter;
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete filter;
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            filter = NULL;
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Trailing unparsed text is a syntax error
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ICU_Utility::skipWhitespace(id, pos, TRUE);
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pos != id.length()) {
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto FAIL;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAIL:
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UObjectDeleter *save = list.setDeleter(_deleteSingleID);
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.setDeleter(save);
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete globalFilter;
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    globalFilter = NULL;
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert the elements of the 'list' vector, which are SingleID
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects, into actual Transliterator objects.  In the course of
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this, some (or all) entries may be removed.  If all entries
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are removed, the NULL transliterator will be added.
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Delete entries with empty basicIDs; these are generated by
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * elements like "(A)" in the forward direction, or "A()" in
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reverse.  THIS MAY RESULT IN AN EMPTY VECTOR.  Convert
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID entries to actual transliterators.
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list vector of SingleID objects.  On exit, vector
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of one or more Transliterators.
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return new value of insertIndex.  The index will shift if
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there are empty items, like "(Lower)", with indices less than
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * insertIndex.
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::instantiateList(UVector& list,
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                UErrorCode& ec) {
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector tlist(ec);
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto RETURN;
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tlist.setDeleter(_deleteTransliteratorTrIDPars);
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator* t;
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size()
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We run the loop too long by one, so we can
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // do an insert after the last element
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i==list.size()) {
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = (SingleID*) list.elementAt(i);
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (single->basicID.length() != 0) {
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = single->createInstance();
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t == NULL) {
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_INVALID_ID;
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto RETURN;
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tlist.addElement(t, ec);
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto RETURN;
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // An empty list is equivalent to a NULL transliterator.
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (tlist.size() == 0) {
504103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), NULL);
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (t == NULL) {
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Should never happen
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec = U_INTERNAL_TRANSLITERATOR_ERROR;
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tlist.addElement(t, ec);
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete t;
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RETURN:
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UObjectDeleter *save = list.setDeleter(_deleteSingleID);
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(ec)) {
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list.setDeleter(_deleteTransliteratorTrIDPars);
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (tlist.size() > 0) {
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = (Transliterator*) tlist.orphanElementAt(0);
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.addElement(t, ec);
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                list.removeAllElements();
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.setDeleter(save);
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * S-T/V, or S/V-T.  If the source is missing, return a source of
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ANY.
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an array of 4 strings: source, target, variant, and
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent.  If the source is not present, ANY will be
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * given as the source, and isSourcePresent will be NULL.  Otherwise
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent will be non-NULL.  The target may be empty if the
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is not well-formed.  The variant may be empty.
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::IDtoSTV(const UnicodeString& id,
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& source,
550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& target,
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& variant,
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UBool& isSourcePresent) {
553103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    source.setTo(ANY, 3);
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    target.truncate(0);
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    variant.truncate(0);
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sep = id.indexOf(TARGET_SEP);
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t var = id.indexOf(VARIANT_SEP);
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (var < 0) {
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        var = id.length();
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    isSourcePresent = FALSE;
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sep < 0) {
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: T/V or T (or /V)
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(0, var, target);
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, id.length(), variant);
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (sep < var) {
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: S-T/V or S-T (or -T/V or -T)
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sep > 0) {
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(0, sep, source);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isSourcePresent = TRUE;
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(++sep, var, target);
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, id.length(), variant);
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: (S/V-T or /V-T)
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (var > 0) {
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(0, var, source);
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isSourcePresent = TRUE;
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, sep++, variant);
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(sep, id.length(), target);
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (variant.length() > 0) {
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        variant.remove(0, 1);
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given source, target, and variant strings, concatenate them into a
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * full ID.  If the source is empty, then "Any" will be used for the
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source, so the ID will always be of the form s-t/v or s-t.
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::STVtoID(const UnicodeString& source,
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& target,
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& variant,
599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& id) {
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id = source;
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (id.length() == 0) {
602103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        id.setTo(ANY, 3);
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.append(TARGET_SEP).append(target);
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (variant.length() != 0) {
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.append(VARIANT_SEP).append(variant);
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // NUL-terminate the ID string for getTerminatedBuffer.
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This prevents valgrind and Purify warnings.
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.append((UChar)0);
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.truncate(id.length()-1);
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Register two targets as being inverses of one another.  For
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example, calling registerSpecialInverse("NFC", "NFD", TRUE) causes
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterator to form the following inverse relationships:
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <pre>NFC => NFD
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFC => Any-NFD
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NFD => NFC
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFD => Any-NFC</pre>
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (Without the special inverse registration, the inverse of NFC
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that the presence or absence of "Any-" is preserved.
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relationship is symmetrical; registering (a, b) is
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equivalent to registering (b, a).
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relevant IDs must still be registered separately as
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * factories or classes.
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Only the targets are specified.  Special inverses always
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the form Any-Target1 <=> Any-Target2.  The target should
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have canonical casing (the casing desired to be produced when
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an inverse is formed) and should contain no whitespace or other
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * extraneous characters.
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param target the target against which to register the inverse
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param inverseTarget the inverse of target, that is
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-target.getInverse() => Any-inverseTarget
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bidirectional if TRUE, register the reverse relation
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as well, that is, Any-inverseTarget.getInverse() => Any-target
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::registerSpecialInverse(const UnicodeString& target,
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    const UnicodeString& inverseTarget,
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    UBool bidirectional,
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    UErrorCode &status) {
650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    umtx_initOnce(gSpecialInversesInitOnce, init, status);
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If target == inverseTarget then force bidirectional => FALSE
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bidirectional && 0==target.caseCompare(inverseTarget, U_FOLD_CASE_DEFAULT)) {
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bidirectional = FALSE;
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Mutex lock(&LOCK);
661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString *tempus = new UnicodeString(inverseTarget);  // Used for null pointer check before usage.
663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (tempus == NULL) {
664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	status = U_MEMORY_ALLOCATION_ERROR;
665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	return;
666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    SPECIAL_INVERSES->put(target, tempus, status);
668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bidirectional) {
669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	tempus = new UnicodeString(target);
670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	if (tempus == NULL) {
671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    		status = U_MEMORY_ALLOCATION_ERROR;
672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    		return;
673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	}
674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        SPECIAL_INVERSES->put(inverseTarget, tempus, status);
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Private implementation
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into component pieces.  Take IDs of the form T,
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source of ANY.
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, pos is the
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * offset of the first character to parse in id.  On output,
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pos is the offset after the last parsed character.  If the
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse failed, pos will be unchanged.
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param allowFilter2 if TRUE, a UnicodeSet pattern is allowed
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at any location between specs or delimiters, and is returned
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as the fifth string in the array.
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a Specs object, or NULL if the parse failed.  If
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * neither source nor target was seen in the parsed id, then the
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse fails.  If allowFilter is TRUE, then the parsed filter
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern is returned in the Specs object, otherwise the returned
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter reference is NULL.  If the parse fails for any reason
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL is returned.
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs*
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos,
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UBool allowFilter) {
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString first;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source;
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString target;
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString variant;
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString filter;
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar delimiter = 0;
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t specCount = 0;
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This loop parses one of the following things with each
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // pass: a filter, a delimiter character (either '-' or '/'),
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // or a spec (source, target, or variant).
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ICU_Utility::skipWhitespace(id, pos, TRUE);
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos == id.length()) {
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Parse filters
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (allowFilter && filter.length() == 0 &&
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet::resemblesPattern(id, pos)) {
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ParsePosition ppos(pos);
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode ec = U_ZERO_ERROR;
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet set(id, ppos, USET_IGNORE_SPACE, NULL, ec);
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pos = start;
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(pos, ppos.getIndex(), filter);
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = ppos.getIndex();
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (delimiter == 0) {
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar c = id.charAt(pos);
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((c == TARGET_SEP && target.length() == 0) ||
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (c == VARIANT_SEP && variant.length() == 0)) {
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delimiter = c;
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pos;
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We are about to try to parse a spec with no delimiter
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // when we can no longer do so (we can only do so at the
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // start); break.
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (delimiter == 0 && specCount > 0) {
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString spec = ICU_Utility::parseUnicodeIdentifier(id, pos);
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (spec.length() == 0) {
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note that if there was a trailing delimiter, we
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // are legal.
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (delimiter) {
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0:
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            first = spec;
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case TARGET_SEP:
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            target = spec;
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case VARIANT_SEP:
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            variant = spec;
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++specCount;
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delimiter = 0;
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // A spec with no prior character is either source or target,
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // depending on whether an explicit "-target" was seen.
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (first.length() != 0) {
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (target.length() == 0) {
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            target = first;
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            source = first;
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Must have either source or target
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source.length() == 0 && target.length() == 0) {
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = start;
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty source or target defaults to ANY
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawSource = TRUE;
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source.length() == 0) {
797103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        source.setTo(ANY, 3);
798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sawSource = FALSE;
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (target.length() == 0) {
801103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        target.setTo(ANY, 3);
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new Specs(source, target, variant, sawSource, filter);
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Givens a Spec object, convert it to a SingleID object.  The
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Spec object is a more unprocessed parse result.  The SingleID
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object contains information about canonical and basic IDs.
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID; never returns NULL.  Returned object always
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * has 'filter' field of NULL.
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToID(const Specs* specs, int32_t dir) {
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString canonID;
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString basicID;
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString basicPrefix;
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (specs != NULL) {
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString buf;
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specs->sawSource) {
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf.append(specs->source).append(TARGET_SEP);
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                basicPrefix = specs->source;
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                basicPrefix.append(TARGET_SEP);
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs->target);
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs->target).append(TARGET_SEP).append(specs->source);
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs->variant.length() != 0) {
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(VARIANT_SEP).append(specs->variant);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID = basicPrefix;
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID.append(buf);
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs->filter.length() != 0) {
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.insert(0, specs->filter);
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        canonID = buf;
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new SingleID(canonID, basicID);
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given a Specs object, return a SingleID representing the
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * special inverse of that ID.  If there is no special inverse
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then return NULL.
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID or NULL.  Returned object always has
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 'filter' field of NULL.
851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToSpecialInverse(const Specs& specs, UErrorCode &status) {
854103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (0!=specs.source.caseCompare(ANY, 3, U_FOLD_CASE_DEFAULT)) {
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    umtx_initOnce(gSpecialInversesInitOnce, init, status);
858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (U_FAILURE(status)) {
859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        return NULL;
860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    }
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString* inverseTarget;
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_lock(&LOCK);
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inverseTarget = (UnicodeString*) SPECIAL_INVERSES->get(specs.target);
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_unlock(&LOCK);
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (inverseTarget != NULL) {
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // If the original ID contained "Any-" then make the
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // special inverse "Any-Foo"; otherwise make it "Foo".
871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // So "Any-NFC" => "Any-NFD" but "NFC" => "NFD".
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString buf;
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.filter.length() != 0) {
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs.filter);
875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.sawSource) {
877103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            buf.append(ANY, 3).append(TARGET_SEP);
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf.append(*inverseTarget);
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
881103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        UnicodeString basicID(TRUE, ANY, 3);
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID.append(TARGET_SEP).append(*inverseTarget);
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.variant.length() != 0) {
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(VARIANT_SEP).append(specs.variant);
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            basicID.append(VARIANT_SEP).append(specs.variant);
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return new SingleID(buf, basicID);
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Glue method to get around access problems in C++.  This would
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ideally be inline but we want to avoid a circular header
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * dependency.
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) {
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return Transliterator::createBasicInstance(id, canonID);
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Initialize static memory. Called through umtx_initOnce only.
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::init(UErrorCode &status) {
906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    U_ASSERT(SPECIAL_INVERSES == NULL);
907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup);
908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    SPECIAL_INVERSES = new Hashtable(TRUE, status);
910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    if (SPECIAL_INVERSES == NULL) {
911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	status = U_MEMORY_ALLOCATION_ERROR;
912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	return;
913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    SPECIAL_INVERSES->setValueDeleter(uprv_deleteUObject);
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Free static memory.
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::cleanup() {
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (SPECIAL_INVERSES) {
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete SPECIAL_INVERSES;
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SPECIAL_INVERSES = NULL;
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius    gSpecialInversesInitOnce.reset();
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//eof
933