1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius*   Copyright (c) 2002-2012, International Business Machines Corporation
4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   and others.  All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   Date        Name        Description
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*   01/14/2002  aliu        Creation.
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru**********************************************************************
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tridpars.h"
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h"
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "mutex.h"
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h"
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h"
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/translit.h"
21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h"
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h"
23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h"
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utrans.h"
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h"
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ID_DELIM    = 0x003B; // ;
31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar TARGET_SEP  = 0x002D; // -
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar VARIANT_SEP = 0x002F; // /
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar OPEN_REV    = 0x0028; // (
34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar CLOSE_REV   = 0x0029; // )
35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar EMPTY[]     = {0}; // ""
37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY[]       = {65,110,121,0}; // "Any"
38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY_NULL[]  = {65,110,121,45,78,117,108,108,0}; // "Any-Null"
39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t FORWARD = UTRANS_FORWARD;
41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t REVERSE = UTRANS_REVERSE;
42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic Hashtable* SPECIAL_INVERSES = NULL;
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The mutex controlling access to SPECIAL_INVERSES
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
4854dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex LOCK = U_MUTEX_INITIALIZER;
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs::Specs(const UnicodeString& s, const UnicodeString& t,
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& v, UBool sawS,
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& f) {
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    source = s;
54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    target = t;
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    variant = v;
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    sawSource = sawS;
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = f;
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b,
61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                           const UnicodeString& f) {
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID = c;
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    basicID = b;
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = f;
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b) {
68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID = c;
69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    basicID = b;
70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::SingleID::createInstance() {
73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator* t;
74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (basicID.length() == 0) {
75103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), &canonID);
76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        t = createBasicInstance(basicID, &canonID);
78b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (t != NULL) {
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter.length() != 0) {
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode ec = U_ZERO_ERROR;
82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet *set = new UnicodeSet(filter, ec);
83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete set;
85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                t->adoptFilter(set);
87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return t;
91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a single ID, that is, an ID of the general form
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, the filters optional, and the variants optional.
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.  If the direction is REVERSE then the
103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID is constructed for the reverse direction.
104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or NULL
105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos,
108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      int32_t dir, UErrorCode& status) {
109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // The ID will be of the form A, A(), A(B), or (B), where
113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // A and B are filter IDs.
114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specsA = NULL;
115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specsB = NULL;
116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawParen = FALSE;
117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // On the first pass, look for (B) or ().  If this fails, then
119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // on the second pass, look for A, A(B), or A().
120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (int32_t pass=1; pass<=2; ++pass) {
121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pass == 2) {
122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            specsA = parseFilterID(id, pos, TRUE);
123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsA == NULL) {
124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pos = start;
125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (ICU_Utility::parseChar(id, pos, OPEN_REV)) {
129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sawParen = TRUE;
130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (!ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                specsB = parseFilterID(id, pos, TRUE);
132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                // Must close with a ')'
133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (specsB == NULL || !ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    delete specsA;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pos = start;
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    return NULL;
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assemble return results
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SingleID* single;
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sawParen) {
146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            SingleID* b = specsToID(specsB, FORWARD);
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsA, FORWARD);
149c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Null pointers check
150c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (b == NULL || single == NULL) {
151c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete b;
152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete single;
153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	status = U_MEMORY_ALLOCATION_ERROR;
154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	return NULL;
155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single->canonID.append(OPEN_REV)
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(b->canonID).append(CLOSE_REV);
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsA != NULL) {
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single->filter = specsA->filter;
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete b;
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            SingleID* a = specsToID(specsA, FORWARD);
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsB, FORWARD);
165c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            // Check for null pointer.
166c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            if (a == NULL || single == NULL) {
167c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete a;
168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	delete single;
169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	status = U_MEMORY_ALLOCATION_ERROR;
170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            	return NULL;
171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru            }
172b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single->canonID.append(OPEN_REV)
173b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                .append(a->canonID).append(CLOSE_REV);
174b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specsB != NULL) {
175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single->filter = specsB->filter;
176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete a;
178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // assert(specsA != NULL);
181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToID(specsA, FORWARD);
183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            single = specsToSpecialInverse(*specsA, status);
185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (single == NULL) {
186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                single = specsToID(specsA, REVERSE);
187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
189c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        // Check for NULL pointer
190c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        if (single == NULL) {
191c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        	status = U_MEMORY_ALLOCATION_ERROR;
192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        	return NULL;
193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        }
194b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        single->filter = specsA->filter;
195b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
196b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specsA;
198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specsB;
199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return single;
201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a filter ID, that is, an ID of the general form
205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1", with the filters optional, and the variants optional.
206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed
207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or null if the parse fails
211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) {
214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Specs* specs = parseFilterID(id, pos, TRUE);
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (specs == NULL) {
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = start;
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Assemble return results
224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    SingleID* single = specsToID(specs, FORWARD);
225c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (single != NULL) {
226c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        single->filter = specs->filter;
227c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete specs;
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return single;
230b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a global filter of the form "[f]" or "([f])", depending
234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on 'withParens'.
235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse
236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, the position of
237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse.  On output, the position after
238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed.
239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.
240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param withParens INPUT-OUTPUT parameter.  On entry, if
241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * withParens is 0, then parens are disallowed.  If it is 1,
242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then parens are requires.  If it is -1, then parens are
243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, and the return result will be set to 0 or 1.
244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter.  The pattern for the filter
245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added to the canonID, either at the end, if dir is FORWARD, or
246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at the start, if dir is REVERSE.  The pattern will be enclosed
247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in parentheses if appropriate, and will be suffixed with an
248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ID_DELIM character.  May be NULL.
249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a UnicodeSet object or NULL.  A non-NULL results
250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indicates a successful parse, regardless of whether the filter
251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * applies to the given direction.  The caller should discard it
252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if withParens != (dir == REVERSE).
253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, int32_t& pos,
255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      int32_t dir,
256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      int32_t& withParens,
257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                      UnicodeString* canonID) {
258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* filter = NULL;
259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (withParens == -1) {
262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        withParens = ICU_Utility::parseChar(id, pos, OPEN_REV) ? 1 : 0;
263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (withParens == 1) {
264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, OPEN_REV)) {
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ICU_Utility::skipWhitespace(id, pos, TRUE);
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (UnicodeSet::resemblesPattern(id, pos)) {
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ParsePosition ppos(pos);
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UErrorCode ec = U_ZERO_ERROR;
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, NULL, ec);
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        /* test for NULL */
277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter == 0) {
278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return 0;
280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete filter;
283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString pattern;
288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(pos, ppos.getIndex(), pattern);
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = ppos.getIndex();
290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (withParens == 1 && !ICU_Utility::parseChar(id, pos, CLOSE_REV)) {
292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = start;
293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            return NULL;
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // In the forward direction, append the pattern to the
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // canonID.  In the reverse, insert it at zero, and invert
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // the presence of parens ("A" <-> "(A)").
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (canonID != NULL) {
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (dir == FORWARD) {
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (withParens == 1) {
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.insert(0, OPEN_REV);
303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.append(CLOSE_REV);
304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->append(pattern).append(ID_DELIM);
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                if (withParens == 0) {
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.insert(0, OPEN_REV);
309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                    pattern.append(CLOSE_REV);
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                }
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->insert(0, pattern);
312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                canonID->insert(pattern.length(), ID_DELIM);
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return filter;
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteSingleID(void* obj) {
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete (TransliteratorIDParser::SingleID*) obj;
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteTransliteratorTrIDPars(void* obj) {
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete (Transliterator*) obj;
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END
329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a compound ID, consisting of an optional forward global
332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter, a separator, one or more single IDs delimited by
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separators, an an optional reverse global filter.  The
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separator is a semicolon.  The global filters are UnicodeSet
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * patterns.  The reverse global filter must be enclosed in
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parentheses.
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction.
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter that receives the canonical ID,
340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * consisting of canonical IDs for all elements, as returned by
341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parseSingleID(), separated by semicolons.  Previous contents
342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are discarded.
343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list OUTPUT parameter that receives a list of SingleID
344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects representing the parsed IDs.  Previous contents are
345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * discarded.
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param globalFilter OUTPUT parameter that receives a pointer to
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a newly created global filter for this ID in this direction, or
348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL if there is none.
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if the parse succeeds, that is, if the entire
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is consumed without syntax error.
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t dir,
353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UnicodeString& canonID,
354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UVector& list,
355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                              UnicodeSet*& globalFilter) {
356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UErrorCode ec = U_ZERO_ERROR;
357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t pos = 0;
359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t withParens = 1;
360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeSet* filter;
362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    globalFilter = NULL;
363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    canonID.truncate(0);
364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Parse leading global filter, if any
366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    withParens = 0; // parens disallowed
367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (filter != NULL) {
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {
370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Not a global filter; backup and resume
371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            canonID.truncate(0);
372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = 0;
373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            globalFilter = filter;
376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete filter;
378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = NULL;
380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawDelimiter = TRUE;
383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = parseSingleID(id, pos, dir, ec);
385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (single == NULL) {
386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.addElement(single, ec);
390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.insertElementAt(single, 0, ec);
392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            goto FAIL;
395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) {
397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            sawDelimiter = FALSE;
398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (list.size() == 0) {
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto FAIL;
404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Construct canonical ID
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<list.size(); ++i) {
408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = (SingleID*) list.elementAt(i);
409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        canonID.append(single->canonID);
410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i != (list.size()-1)) {
411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            canonID.append(ID_DELIM);
412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Parse trailing global filter, if any, and only if we saw
416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // a trailing delimiter after the IDs.
417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sawDelimiter) {
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        withParens = 1; // parens required
419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        filter = parseGlobalFilter(id, pos, dir, withParens, &canonID);
420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (filter != NULL) {
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Don't require trailing ';', but parse it if present
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ICU_Utility::parseChar(id, pos, ID_DELIM);
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (dir == REVERSE) {
425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                globalFilter = filter;
426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete filter;
428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            filter = NULL;
430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Trailing unparsed text is a syntax error
434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    ICU_Utility::skipWhitespace(id, pos, TRUE);
435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (pos != id.length()) {
436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto FAIL;
437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return TRUE;
440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAIL:
442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UObjectDeleter *save = list.setDeleter(_deleteSingleID);
443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.setDeleter(save);
445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete globalFilter;
446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    globalFilter = NULL;
447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return FALSE;
448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert the elements of the 'list' vector, which are SingleID
452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects, into actual Transliterator objects.  In the course of
453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this, some (or all) entries may be removed.  If all entries
454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are removed, the NULL transliterator will be added.
455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Delete entries with empty basicIDs; these are generated by
457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * elements like "(A)" in the forward direction, or "A()" in
458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reverse.  THIS MAY RESULT IN AN EMPTY VECTOR.  Convert
459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID entries to actual transliterators.
460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list vector of SingleID objects.  On exit, vector
462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of one or more Transliterators.
463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return new value of insertIndex.  The index will shift if
464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there are empty items, like "(Lower)", with indices less than
465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * insertIndex.
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::instantiateList(UVector& list,
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                UErrorCode& ec) {
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UVector tlist(ec);
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(ec)) {
471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        goto RETURN;
472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    tlist.setDeleter(_deleteTransliteratorTrIDPars);
474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Transliterator* t;
476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t i;
477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size()
478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We run the loop too long by one, so we can
479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // do an insert after the last element
480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (i==list.size()) {
481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SingleID* single = (SingleID*) list.elementAt(i);
485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (single->basicID.length() != 0) {
486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = single->createInstance();
487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (t == NULL) {
488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ec = U_INVALID_ID;
489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto RETURN;
490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            tlist.addElement(t, ec);
492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                goto RETURN;
495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // An empty list is equivalent to a NULL transliterator.
500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (tlist.size() == 0) {
501103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), NULL);
502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (t == NULL) {
503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Should never happen
504b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ec = U_INTERNAL_TRANSLITERATOR_ERROR;
505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        tlist.addElement(t, ec);
507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (U_FAILURE(ec)) {
508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            delete t;
509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RETURN:
513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UObjectDeleter *save = list.setDeleter(_deleteSingleID);
515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.removeAllElements();
516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_SUCCESS(ec)) {
518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        list.setDeleter(_deleteTransliteratorTrIDPars);
519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        while (tlist.size() > 0) {
521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            t = (Transliterator*) tlist.orphanElementAt(0);
522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            list.addElement(t, ec);
523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delete t;
525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                list.removeAllElements();
526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                break;
527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    list.setDeleter(save);
532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into pieces.  Take IDs of the form T, T/V, S-T,
536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * S-T/V, or S/V-T.  If the source is missing, return a source of
537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ANY.
538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms
539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an array of 4 strings: source, target, variant, and
540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent.  If the source is not present, ANY will be
541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * given as the source, and isSourcePresent will be NULL.  Otherwise
542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent will be non-NULL.  The target may be empty if the
543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is not well-formed.  The variant may be empty.
544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::IDtoSTV(const UnicodeString& id,
546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& source,
547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& target,
548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& variant,
549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UBool& isSourcePresent) {
550103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    source.setTo(ANY, 3);
551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    target.truncate(0);
552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    variant.truncate(0);
553b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t sep = id.indexOf(TARGET_SEP);
555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t var = id.indexOf(VARIANT_SEP);
556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (var < 0) {
557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        var = id.length();
558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    isSourcePresent = FALSE;
560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (sep < 0) {
562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: T/V or T (or /V)
563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(0, var, target);
564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, id.length(), variant);
565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else if (sep < var) {
566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: S-T/V or S-T (or -T/V or -T)
567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (sep > 0) {
568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(0, sep, source);
569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isSourcePresent = TRUE;
570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(++sep, var, target);
572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, id.length(), variant);
573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    } else {
574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Form: (S/V-T or /V-T)
575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (var > 0) {
576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(0, var, source);
577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            isSourcePresent = TRUE;
578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(var, sep++, variant);
580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.extractBetween(sep, id.length(), target);
581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (variant.length() > 0) {
584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        variant.remove(0, 1);
585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given source, target, and variant strings, concatenate them into a
590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * full ID.  If the source is empty, then "Any" will be used for the
591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source, so the ID will always be of the form s-t/v or s-t.
592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::STVtoID(const UnicodeString& source,
594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& target,
595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     const UnicodeString& variant,
596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                     UnicodeString& id) {
597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id = source;
598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (id.length() == 0) {
599103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        id.setTo(ANY, 3);
600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.append(TARGET_SEP).append(target);
602b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (variant.length() != 0) {
603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        id.append(VARIANT_SEP).append(variant);
604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // NUL-terminate the ID string for getTerminatedBuffer.
606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This prevents valgrind and Purify warnings.
607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.append((UChar)0);
608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    id.truncate(id.length()-1);
609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Register two targets as being inverses of one another.  For
613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example, calling registerSpecialInverse("NFC", "NFD", TRUE) causes
614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterator to form the following inverse relationships:
615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <pre>NFC => NFD
617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFC => Any-NFD
618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NFD => NFC
619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFD => Any-NFC</pre>
620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (Without the special inverse registration, the inverse of NFC
622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * would be NFC-Any.)  Note that NFD is shorthand for Any-NFD, but
623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that the presence or absence of "Any-" is preserved.
624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relationship is symmetrical; registering (a, b) is
626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equivalent to registering (b, a).
627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relevant IDs must still be registered separately as
629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * factories or classes.
630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Only the targets are specified.  Special inverses always
632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the form Any-Target1 <=> Any-Target2.  The target should
633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have canonical casing (the casing desired to be produced when
634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an inverse is formed) and should contain no whitespace or other
635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * extraneous characters.
636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *
637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param target the target against which to register the inverse
638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param inverseTarget the inverse of target, that is
639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-target.getInverse() => Any-inverseTarget
640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bidirectional if TRUE, register the reverse relation
641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as well, that is, Any-inverseTarget.getInverse() => Any-target
642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::registerSpecialInverse(const UnicodeString& target,
644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    const UnicodeString& inverseTarget,
645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    UBool bidirectional,
646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                                    UErrorCode &status) {
647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    init(status);
648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
650b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // If target == inverseTarget then force bidirectional => FALSE
653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bidirectional && 0==target.caseCompare(inverseTarget, U_FOLD_CASE_DEFAULT)) {
654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        bidirectional = FALSE;
655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Mutex lock(&LOCK);
658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
659c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    UnicodeString *tempus = new UnicodeString(inverseTarget);  // Used for null pointer check before usage.
660c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (tempus == NULL) {
661c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	status = U_MEMORY_ALLOCATION_ERROR;
662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	return;
663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    SPECIAL_INVERSES->put(target, tempus, status);
665b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (bidirectional) {
666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	tempus = new UnicodeString(target);
667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	if (tempus == NULL) {
668c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    		status = U_MEMORY_ALLOCATION_ERROR;
669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    		return;
670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	}
671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        SPECIAL_INVERSES->put(inverseTarget, tempus, status);
672b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
673b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
674b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Private implementation
677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//----------------------------------------------------------------
678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into component pieces.  Take IDs of the form T,
681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * T/V, S-T, S-T/V, or S/V-T.  If the source is missing, return a
682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source of ANY.
683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms
684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter.  On input, pos is the
685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * offset of the first character to parse in id.  On output,
686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pos is the offset after the last parsed character.  If the
687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse failed, pos will be unchanged.
688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param allowFilter2 if TRUE, a UnicodeSet pattern is allowed
689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at any location between specs or delimiters, and is returned
690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as the fifth string in the array.
691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a Specs object, or NULL if the parse failed.  If
692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * neither source nor target was seen in the parsed id, then the
693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse fails.  If allowFilter is TRUE, then the parsed filter
694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern is returned in the Specs object, otherwise the returned
695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter reference is NULL.  If the parse fails for any reason
696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL is returned.
697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs*
699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos,
700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                      UBool allowFilter) {
701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString first;
702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString source;
703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString target;
704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString variant;
705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString filter;
706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UChar delimiter = 0;
707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t specCount = 0;
708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    int32_t start = pos;
709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // This loop parses one of the following things with each
711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // pass: a filter, a delimiter character (either '-' or '/'),
712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // or a spec (source, target, or variant).
713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    for (;;) {
714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ICU_Utility::skipWhitespace(id, pos, TRUE);
715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (pos == id.length()) {
716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // Parse filters
720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (allowFilter && filter.length() == 0 &&
721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet::resemblesPattern(id, pos)) {
722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            ParsePosition ppos(pos);
724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UErrorCode ec = U_ZERO_ERROR;
725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UnicodeSet set(id, ppos, USET_IGNORE_SPACE, NULL, ec);
726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (U_FAILURE(ec)) {
727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                pos = start;
728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                return NULL;
729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            id.extractBetween(pos, ppos.getIndex(), filter);
731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            pos = ppos.getIndex();
732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            continue;
733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (delimiter == 0) {
736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            UChar c = id.charAt(pos);
737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if ((c == TARGET_SEP && target.length() == 0) ||
738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                (c == VARIANT_SEP && variant.length() == 0)) {
739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                delimiter = c;
740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                ++pos;
741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                continue;
742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // We are about to try to parse a spec with no delimiter
746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // when we can no longer do so (we can only do so at the
747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // start); break.
748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (delimiter == 0 && specCount > 0) {
749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString spec = ICU_Utility::parseUnicodeIdentifier(id, pos);
753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (spec.length() == 0) {
754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // Note that if there was a trailing delimiter, we
755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // consume it.  So Foo-, Foo/, Foo-Bar/, and Foo/Bar-
756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            // are legal.
757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        switch (delimiter) {
761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case 0:
762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            first = spec;
763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case TARGET_SEP:
765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            target = spec;
766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        case VARIANT_SEP:
768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            variant = spec;
769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            break;
770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        ++specCount;
772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delimiter = 0;
773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // A spec with no prior character is either source or target,
776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // depending on whether an explicit "-target" was seen.
777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (first.length() != 0) {
778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (target.length() == 0) {
779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            target = first;
780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            source = first;
782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Must have either source or target
786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source.length() == 0 && target.length() == 0) {
787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        pos = start;
788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Empty source or target defaults to ANY
792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UBool sawSource = TRUE;
793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (source.length() == 0) {
794103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        source.setTo(ANY, 3);
795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        sawSource = FALSE;
796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
797b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (target.length() == 0) {
798103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        target.setTo(ANY, 3);
799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
801b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new Specs(source, target, variant, sawSource, filter);
802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Givens a Spec object, convert it to a SingleID object.  The
806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Spec object is a more unprocessed parse result.  The SingleID
807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object contains information about canonical and basic IDs.
808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID; never returns NULL.  Returned object always
809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * has 'filter' field of NULL.
810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToID(const Specs* specs, int32_t dir) {
813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString canonID;
814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString basicID;
815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString basicPrefix;
816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (specs != NULL) {
817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString buf;
818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (dir == FORWARD) {
819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            if (specs->sawSource) {
820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                buf.append(specs->source).append(TARGET_SEP);
821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            } else {
822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                basicPrefix = specs->source;
823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                basicPrefix.append(TARGET_SEP);
824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            }
825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs->target);
826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        } else {
827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs->target).append(TARGET_SEP).append(specs->source);
828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs->variant.length() != 0) {
830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(VARIANT_SEP).append(specs->variant);
831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID = basicPrefix;
833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID.append(buf);
834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs->filter.length() != 0) {
835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.insert(0, specs->filter);
836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        canonID = buf;
838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return new SingleID(canonID, basicID);
840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given a Specs object, return a SingleID representing the
844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * special inverse of that ID.  If there is no special inverse
845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then return NULL.
846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID or NULL.  Returned object always has
847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 'filter' field of NULL.
848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID*
850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToSpecialInverse(const Specs& specs, UErrorCode &status) {
851103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    if (0!=specs.source.caseCompare(ANY, 3, U_FOLD_CASE_DEFAULT)) {
852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return NULL;
853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
854b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    init(status);
855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    UnicodeString* inverseTarget;
857b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
858b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_lock(&LOCK);
859b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    inverseTarget = (UnicodeString*) SPECIAL_INVERSES->get(specs.target);
860b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_unlock(&LOCK);
861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (inverseTarget != NULL) {
863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // If the original ID contained "Any-" then make the
864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // special inverse "Any-Foo"; otherwise make it "Foo".
865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        // So "Any-NFC" => "Any-NFD" but "NFC" => "NFD".
866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        UnicodeString buf;
867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.filter.length() != 0) {
868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(specs.filter);
869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.sawSource) {
871103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            buf.append(ANY, 3).append(TARGET_SEP);
872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        buf.append(*inverseTarget);
874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
875103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        UnicodeString basicID(TRUE, ANY, 3);
876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        basicID.append(TARGET_SEP).append(*inverseTarget);
877b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        if (specs.variant.length() != 0) {
879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            buf.append(VARIANT_SEP).append(specs.variant);
880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru            basicID.append(VARIANT_SEP).append(specs.variant);
881b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return new SingleID(buf, basicID);
883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return NULL;
885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Glue method to get around access problems in C++.  This would
889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ideally be inline but we want to avoid a circular header
890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * dependency.
891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) {
893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return Transliterator::createBasicInstance(id, canonID);
894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Initialize static memory.
898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::init(UErrorCode &status) {
900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (SPECIAL_INVERSES != NULL) {
901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
903b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    Hashtable* special_inverses = new Hashtable(TRUE, status);
905c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    // Null pointer check
906c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    if (special_inverses == NULL) {
907c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	status = U_MEMORY_ALLOCATION_ERROR;
908c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    	return;
909c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru    }
910103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    special_inverses->setValueDeleter(uprv_deleteUObject);
911b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
912b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_lock(&LOCK);
913b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (SPECIAL_INVERSES == NULL) {
914b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SPECIAL_INVERSES = special_inverses;
915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        special_inverses = NULL;
916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    umtx_unlock(&LOCK);
918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    delete special_inverses; /*null instance*/
919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
920b0ac937921a2c196d8b9da665135bf6ba01a1ccfJean-Baptiste Queru    ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup);
921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Free static memory.
925b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */
926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::cleanup() {
927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (SPECIAL_INVERSES) {
928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        delete SPECIAL_INVERSES;
929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        SPECIAL_INVERSES = NULL;
930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
933b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
934b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
935b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
936b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
937b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//eof
938