1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Copyright (c) 2002-2014, International Business Machines Corporation 4b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* and others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 01/14/2002 aliu Creation. 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru********************************************************************** 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "tridpars.h" 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "hash.h" 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "mutex.h" 18f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "transreg.h" 19f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius#include "uassert.h" 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "ucln_in.h" 21b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/parsepos.h" 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/translit.h" 23b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uchar.h" 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/uniset.h" 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/unistr.h" 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utrans.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "util.h" 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "uvector.h" 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 31b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ID_DELIM = 0x003B; // ; 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar TARGET_SEP = 0x002D; // - 34b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar VARIANT_SEP = 0x002F; // / 35b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar OPEN_REV = 0x0028; // ( 36b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar CLOSE_REV = 0x0029; // ) 37b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 38b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//static const UChar EMPTY[] = {0}; // "" 39b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY[] = {65,110,121,0}; // "Any" 40b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const UChar ANY_NULL[] = {65,110,121,45,78,117,108,108,0}; // "Any-Null" 41b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 42b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t FORWARD = UTRANS_FORWARD; 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic const int32_t REVERSE = UTRANS_REVERSE; 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic Hashtable* SPECIAL_INVERSES = NULL; 46f9878a236aa0d9662d8e40cafdaf2e04cd615835ccorneliusstatic UInitOnce gSpecialInversesInitOnce = U_INITONCE_INITIALIZER; 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * The mutex controlling access to SPECIAL_INVERSES 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 5154dcd9b6a06071f647dac967e9e267abb9410720Craig Corneliusstatic UMutex LOCK = U_MUTEX_INITIALIZER; 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs::Specs(const UnicodeString& s, const UnicodeString& t, 54b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& v, UBool sawS, 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& f) { 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = s; 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target = t; 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant = v; 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sawSource = sawS; 60b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = f; 61b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b, 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& f) { 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID = c; 66b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID = b; 67b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = f; 68b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 69b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 70b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID::SingleID(const UnicodeString& c, const UnicodeString& b) { 71b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID = c; 72b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID = b; 73b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 74b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 75b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::SingleID::createInstance() { 76b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t; 77b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (basicID.length() == 0) { 78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), &canonID); 79b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = createBasicInstance(basicID, &canonID); 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 82b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t != NULL) { 83b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter.length() != 0) { 84b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 85b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet *set = new UnicodeSet(filter, ec); 86b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 87b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete set; 88b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 89b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t->adoptFilter(set); 90b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 91b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 92b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 93b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return t; 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a single ID, that is, an ID of the general form 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1 ([f2] s2-t3/v2)", with the parenthesized element 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, the filters optional, and the variants optional. 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter. On input, the position of 103b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse. On output, the position after 104b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed. 105b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction. If the direction is REVERSE then the 106b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID is constructed for the reverse direction. 107b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or NULL 108b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 109b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID* 110b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseSingleID(const UnicodeString& id, int32_t& pos, 111b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t dir, UErrorCode& status) { 112b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 113b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos; 114b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 115b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // The ID will be of the form A, A(), A(B), or (B), where 116b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A and B are filter IDs. 117b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Specs* specsA = NULL; 118b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Specs* specsB = NULL; 119b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool sawParen = FALSE; 120b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 121b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // On the first pass, look for (B) or (). If this fails, then 122b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // on the second pass, look for A, A(B), or A(). 123b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (int32_t pass=1; pass<=2; ++pass) { 124b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pass == 2) { 125b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru specsA = parseFilterID(id, pos, TRUE); 126b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specsA == NULL) { 127b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 128b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 129b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 130b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 131b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (ICU_Utility::parseChar(id, pos, OPEN_REV)) { 132b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sawParen = TRUE; 133b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::parseChar(id, pos, CLOSE_REV)) { 134b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru specsB = parseFilterID(id, pos, TRUE); 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Must close with a ')' 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specsB == NULL || !ICU_Utility::parseChar(id, pos, CLOSE_REV)) { 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete specsA; 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 146b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Assemble return results 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* single; 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sawParen) { 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 150b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* b = specsToID(specsB, FORWARD); 151b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single = specsToID(specsA, FORWARD); 152c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Null pointers check 153c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (b == NULL || single == NULL) { 154c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete b; 155c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete single; 156c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 157c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 158c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single->canonID.append(OPEN_REV) 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(b->canonID).append(CLOSE_REV); 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specsA != NULL) { 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single->filter = specsA->filter; 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete b; 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 166b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* a = specsToID(specsA, FORWARD); 167b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single = specsToID(specsB, FORWARD); 168c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for null pointer. 169c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (a == NULL || single == NULL) { 170c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete a; 171c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru delete single; 172c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 173c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 174c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 175b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single->canonID.append(OPEN_REV) 176b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru .append(a->canonID).append(CLOSE_REV); 177b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specsB != NULL) { 178b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single->filter = specsB->filter; 179b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 180b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete a; 181b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 182b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 183b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // assert(specsA != NULL); 184b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 185b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single = specsToID(specsA, FORWARD); 186b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 187b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single = specsToSpecialInverse(*specsA, status); 188b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (single == NULL) { 189b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single = specsToID(specsA, REVERSE); 190b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 191b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 192c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru // Check for NULL pointer 193c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (single == NULL) { 194c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 195c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return NULL; 196c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 197b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru single->filter = specsA->filter; 198b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 199b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 200b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete specsA; 201b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete specsB; 202b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 203b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return single; 204b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 205b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 206b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 207b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a filter ID, that is, an ID of the general form 208b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * "[f1] s1-t1/v1", with the filters optional, and the variants optional. 209b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id to be parsed 210b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter. On input, the position of 211b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse. On output, the position after 212b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed. 213b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID object or null if the parse fails 214b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID* 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos) { 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos; 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Specs* specs = parseFilterID(id, pos, TRUE); 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs == NULL) { 222b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 223b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 224b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Assemble return results 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* single = specsToID(specs, FORWARD); 228c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (single != NULL) { 229c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru single->filter = specs->filter; 230c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 231b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete specs; 232b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return single; 233b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 234b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 235b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 236b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a global filter of the form "[f]" or "([f])", depending 237b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * on 'withParens'. 238b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse 239b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter. On input, the position of 240b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the first character to parse. On output, the position after 241b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the last character parsed. 242b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction. 243b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param withParens INPUT-OUTPUT parameter. On entry, if 244b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * withParens is 0, then parens are disallowed. If it is 1, 245b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then parens are requires. If it is -1, then parens are 246b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * optional, and the return result will be set to 0 or 1. 247b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter. The pattern for the filter 248b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * added to the canonID, either at the end, if dir is FORWARD, or 249b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at the start, if dir is REVERSE. The pattern will be enclosed 250b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * in parentheses if appropriate, and will be suffixed with an 251b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ID_DELIM character. May be NULL. 252b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a UnicodeSet object or NULL. A non-NULL results 253b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * indicates a successful parse, regardless of whether the filter 254b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * applies to the given direction. The caller should discard it 255b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * if withParens != (dir == REVERSE). 256b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 257b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUnicodeSet* TransliteratorIDParser::parseGlobalFilter(const UnicodeString& id, int32_t& pos, 258b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t dir, 259b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t& withParens, 260b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* canonID) { 261b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* filter = NULL; 262b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos; 263b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 264b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withParens == -1) { 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru withParens = ICU_Utility::parseChar(id, pos, OPEN_REV) ? 1 : 0; 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (withParens == 1) { 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::parseChar(id, pos, OPEN_REV)) { 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::skipWhitespace(id, pos, TRUE); 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (UnicodeSet::resemblesPattern(id, pos)) { 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition ppos(pos); 277b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 278b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = new UnicodeSet(id, ppos, USET_IGNORE_SPACE, NULL, ec); 279b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru /* test for NULL */ 280b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter == 0) { 281b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 282b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return 0; 283b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 284b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 286b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 287b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 288b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 290b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString pattern; 291b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(pos, ppos.getIndex(), pattern); 292b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = ppos.getIndex(); 293b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withParens == 1 && !ICU_Utility::parseChar(id, pos, CLOSE_REV)) { 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // In the forward direction, append the pattern to the 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // canonID. In the reverse, insert it at zero, and invert 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // the presence of parens ("A" <-> "(A)"). 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (canonID != NULL) { 303b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 304b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withParens == 1) { 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.insert(0, OPEN_REV); 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.append(CLOSE_REV); 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID->append(pattern).append(ID_DELIM); 309b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (withParens == 0) { 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.insert(0, OPEN_REV); 312b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pattern.append(CLOSE_REV); 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID->insert(0, pattern); 315b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID->insert(pattern.length(), ID_DELIM); 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return filter; 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_BEGIN 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteSingleID(void* obj) { 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete (TransliteratorIDParser::SingleID*) obj; 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 328b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Querustatic void U_CALLCONV _deleteTransliteratorTrIDPars(void* obj) { 329b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete (Transliterator*) obj; 330b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 331b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_CDECL_END 332b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse a compound ID, consisting of an optional forward global 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter, a separator, one or more single IDs delimited by 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separators, an an optional reverse global filter. The 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * separator is a semicolon. The global filters are UnicodeSet 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * patterns. The reverse global filter must be enclosed in 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parentheses. 340b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the pattern the parse 341b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param dir the direction. 342b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param canonID OUTPUT parameter that receives the canonical ID, 343b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * consisting of canonical IDs for all elements, as returned by 344b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parseSingleID(), separated by semicolons. Previous contents 345b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are discarded. 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list OUTPUT parameter that receives a list of SingleID 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects representing the parsed IDs. Previous contents are 348b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * discarded. 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param globalFilter OUTPUT parameter that receives a pointer to 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * a newly created global filter for this ID in this direction, or 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL if there is none. 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return TRUE if the parse succeeds, that is, if the entire 353b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is consumed without syntax error. 354b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 355b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool TransliteratorIDParser::parseCompoundID(const UnicodeString& id, int32_t dir, 356b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& canonID, 357b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector& list, 358b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet*& globalFilter) { 359b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 360b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 361b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t pos = 0; 362b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t withParens = 1; 363b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.removeAllElements(); 364b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet* filter; 365b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalFilter = NULL; 366b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID.truncate(0); 367b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 368b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse leading global filter, if any 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru withParens = 0; // parens disallowed 370b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); 371b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 372b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { 373b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Not a global filter; backup and resume 374b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID.truncate(0); 375b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = 0; 376b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 377b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 378b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalFilter = filter; 379b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 380b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 381b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 382b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = NULL; 383b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 385b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool sawDelimiter = TRUE; 386b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* single = parseSingleID(id, pos, dir, ec); 388b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (single == NULL) { 389b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 390b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 391b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 392b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.addElement(single, ec); 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 394b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.insertElementAt(single, 0, ec); 395b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 396b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 397b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto FAIL; 398b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 399b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (!ICU_Utility::parseChar(id, pos, ID_DELIM)) { 400b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sawDelimiter = FALSE; 401b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 402b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 404b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 405b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (list.size() == 0) { 406b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto FAIL; 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 408b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 409b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Construct canonical ID 410b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<list.size(); ++i) { 411b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* single = (SingleID*) list.elementAt(i); 412b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID.append(single->canonID); 413b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i != (list.size()-1)) { 414b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID.append(ID_DELIM); 415b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 416b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 417b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse trailing global filter, if any, and only if we saw 419b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // a trailing delimiter after the IDs. 420b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sawDelimiter) { 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru withParens = 1; // parens required 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = parseGlobalFilter(id, pos, dir, withParens, &canonID); 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (filter != NULL) { 424b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Don't require trailing ';', but parse it if present 425b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::parseChar(id, pos, ID_DELIM); 426b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 427b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == REVERSE) { 428b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalFilter = filter; 429b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 430b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete filter; 431b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 432b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru filter = NULL; 433b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 434b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 435b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 436b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Trailing unparsed text is a syntax error 437b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::skipWhitespace(id, pos, TRUE); 438b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos != id.length()) { 439b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto FAIL; 440b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 441b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 442b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 443b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 444b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru FAIL: 445b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObjectDeleter *save = list.setDeleter(_deleteSingleID); 446b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.removeAllElements(); 447b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.setDeleter(save); 448b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete globalFilter; 449b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru globalFilter = NULL; 450b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return FALSE; 451b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 452b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 453b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 454b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Convert the elements of the 'list' vector, which are SingleID 455b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * objects, into actual Transliterator objects. In the course of 456b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * this, some (or all) entries may be removed. If all entries 457b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * are removed, the NULL transliterator will be added. 458b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 459b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Delete entries with empty basicIDs; these are generated by 460b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * elements like "(A)" in the forward direction, or "A()" in 461b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * the reverse. THIS MAY RESULT IN AN EMPTY VECTOR. Convert 462b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * SingleID entries to actual transliterators. 463b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 464b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param list vector of SingleID objects. On exit, vector 465b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * of one or more Transliterators. 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return new value of insertIndex. The index will shift if 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * there are empty items, like "(Lower)", with indices less than 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * insertIndex. 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::instantiateList(UVector& list, 471b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& ec) { 472b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UVector tlist(ec); 473b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 474b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto RETURN; 475b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 476b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tlist.setDeleter(_deleteTransliteratorTrIDPars); 477b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 478b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Transliterator* t; 479b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t i; 480b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (i=0; i<=list.size(); ++i) { // [sic]: i<=list.size() 481b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We run the loop too long by one, so we can 482b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // do an insert after the last element 483b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (i==list.size()) { 484b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 485b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 486b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 487b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SingleID* single = (SingleID*) list.elementAt(i); 488b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (single->basicID.length() != 0) { 489b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = single->createInstance(); 490b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t == NULL) { 491b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INVALID_ID; 492b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto RETURN; 493b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 494b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tlist.addElement(t, ec); 495b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 496b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 497b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru goto RETURN; 498b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 499b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 500b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 501b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 502b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // An empty list is equivalent to a NULL transliterator. 503b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (tlist.size() == 0) { 504103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius t = createBasicInstance(UnicodeString(TRUE, ANY_NULL, 8), NULL); 505b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (t == NULL) { 506b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Should never happen 507b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ec = U_INTERNAL_TRANSLITERATOR_ERROR; 508b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 509b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru tlist.addElement(t, ec); 510b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 511b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 512b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 513b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 514b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 515b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru RETURN: 516b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 517b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UObjectDeleter *save = list.setDeleter(_deleteSingleID); 518b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.removeAllElements(); 519b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 520b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_SUCCESS(ec)) { 521b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.setDeleter(_deleteTransliteratorTrIDPars); 522b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 523b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru while (tlist.size() > 0) { 524b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru t = (Transliterator*) tlist.orphanElementAt(0); 525b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.addElement(t, ec); 526b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 527b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete t; 528b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.removeAllElements(); 529b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 530b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 531b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 532b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 533b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 534b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru list.setDeleter(save); 535b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 536b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 537b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 538b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into pieces. Take IDs of the form T, T/V, S-T, 539b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * S-T/V, or S/V-T. If the source is missing, return a source of 540b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ANY. 541b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms 542b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return an array of 4 strings: source, target, variant, and 543b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent. If the source is not present, ANY will be 544b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * given as the source, and isSourcePresent will be NULL. Otherwise 545b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * isSourcePresent will be non-NULL. The target may be empty if the 546b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * id is not well-formed. The variant may be empty. 547b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 548b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::IDtoSTV(const UnicodeString& id, 549b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& source, 550b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& target, 551b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& variant, 552b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool& isSourcePresent) { 553103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius source.setTo(ANY, 3); 554b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target.truncate(0); 555b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant.truncate(0); 556b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 557b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t sep = id.indexOf(TARGET_SEP); 558b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t var = id.indexOf(VARIANT_SEP); 559b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (var < 0) { 560b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru var = id.length(); 561b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 562b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isSourcePresent = FALSE; 563b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 564b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sep < 0) { 565b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Form: T/V or T (or /V) 566b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(0, var, target); 567b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(var, id.length(), variant); 568b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else if (sep < var) { 569b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Form: S-T/V or S-T (or -T/V or -T) 570b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (sep > 0) { 571b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(0, sep, source); 572b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isSourcePresent = TRUE; 573b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 574b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(++sep, var, target); 575b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(var, id.length(), variant); 576b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 577b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Form: (S/V-T or /V-T) 578b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (var > 0) { 579b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(0, var, source); 580b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru isSourcePresent = TRUE; 581b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 582b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(var, sep++, variant); 583b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(sep, id.length(), target); 584b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 585b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 586b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (variant.length() > 0) { 587b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant.remove(0, 1); 588b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 589b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 590b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 591b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 592b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given source, target, and variant strings, concatenate them into a 593b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * full ID. If the source is empty, then "Any" will be used for the 594b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source, so the ID will always be of the form s-t/v or s-t. 595b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 596b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::STVtoID(const UnicodeString& source, 597b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& target, 598b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& variant, 599b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString& id) { 600b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id = source; 601b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (id.length() == 0) { 602103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius id.setTo(ANY, 3); 603b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 604b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.append(TARGET_SEP).append(target); 605b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (variant.length() != 0) { 606b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.append(VARIANT_SEP).append(variant); 607b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 608b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // NUL-terminate the ID string for getTerminatedBuffer. 609b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This prevents valgrind and Purify warnings. 610b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.append((UChar)0); 611b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.truncate(id.length()-1); 612b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 613b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 614b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 615b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Register two targets as being inverses of one another. For 616b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * example, calling registerSpecialInverse("NFC", "NFD", TRUE) causes 617b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Transliterator to form the following inverse relationships: 618b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 619b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <pre>NFC => NFD 620b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFC => Any-NFD 621b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NFD => NFC 622b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-NFD => Any-NFC</pre> 623b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 624b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * (Without the special inverse registration, the inverse of NFC 625b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * would be NFC-Any.) Note that NFD is shorthand for Any-NFD, but 626b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * that the presence or absence of "Any-" is preserved. 627b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 628b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relationship is symmetrical; registering (a, b) is 629b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * equivalent to registering (b, a). 630b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 631b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>The relevant IDs must still be registered separately as 632b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * factories or classes. 633b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 634b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * <p>Only the targets are specified. Special inverses always 635b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have the form Any-Target1 <=> Any-Target2. The target should 636b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * have canonical casing (the casing desired to be produced when 637b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * an inverse is formed) and should contain no whitespace or other 638b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * extraneous characters. 639b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 640b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param target the target against which to register the inverse 641b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param inverseTarget the inverse of target, that is 642b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Any-target.getInverse() => Any-inverseTarget 643b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param bidirectional if TRUE, register the reverse relation 644b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as well, that is, Any-inverseTarget.getInverse() => Any-target 645b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 646b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::registerSpecialInverse(const UnicodeString& target, 647b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const UnicodeString& inverseTarget, 648b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool bidirectional, 649b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode &status) { 650f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_initOnce(gSpecialInversesInitOnce, init, status); 651b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 652b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 653b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 654b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 655b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If target == inverseTarget then force bidirectional => FALSE 656b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bidirectional && 0==target.caseCompare(inverseTarget, U_FOLD_CASE_DEFAULT)) { 657b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru bidirectional = FALSE; 658b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 659b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 660b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru Mutex lock(&LOCK); 661b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 662c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru UnicodeString *tempus = new UnicodeString(inverseTarget); // Used for null pointer check before usage. 663c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tempus == NULL) { 664c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 665c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 666c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 667c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SPECIAL_INVERSES->put(target, tempus, status); 668b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (bidirectional) { 669c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru tempus = new UnicodeString(target); 670c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru if (tempus == NULL) { 671c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 672c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 673c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 674c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru SPECIAL_INVERSES->put(inverseTarget, tempus, status); 675b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 676b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 677b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 678b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 679b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Private implementation 680b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//---------------------------------------------------------------- 681b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 682b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 683b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Parse an ID into component pieces. Take IDs of the form T, 684b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * T/V, S-T, S-T/V, or S/V-T. If the source is missing, return a 685b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * source of ANY. 686b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param id the id string, in any of several forms 687b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param pos INPUT-OUTPUT parameter. On input, pos is the 688b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * offset of the first character to parse in id. On output, 689b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pos is the offset after the last parsed character. If the 690b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse failed, pos will be unchanged. 691b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @param allowFilter2 if TRUE, a UnicodeSet pattern is allowed 692b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * at any location between specs or delimiters, and is returned 693b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * as the fifth string in the array. 694b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a Specs object, or NULL if the parse failed. If 695b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * neither source nor target was seen in the parsed id, then the 696b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * parse fails. If allowFilter is TRUE, then the parsed filter 697b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * pattern is returned in the Specs object, otherwise the returned 698b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * filter reference is NULL. If the parse fails for any reason 699b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * NULL is returned. 700b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 701b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::Specs* 702b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::parseFilterID(const UnicodeString& id, int32_t& pos, 703b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool allowFilter) { 704b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString first; 705b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString source; 706b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString target; 707b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString variant; 708b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString filter; 709b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar delimiter = 0; 710b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t specCount = 0; 711b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru int32_t start = pos; 712b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 713b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // This loop parses one of the following things with each 714b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // pass: a filter, a delimiter character (either '-' or '/'), 715b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // or a spec (source, target, or variant). 716b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru for (;;) { 717b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ICU_Utility::skipWhitespace(id, pos, TRUE); 718b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (pos == id.length()) { 719b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 720b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 721b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 722b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Parse filters 723b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (allowFilter && filter.length() == 0 && 724b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet::resemblesPattern(id, pos)) { 725b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 726b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ParsePosition ppos(pos); 727b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode ec = U_ZERO_ERROR; 728b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeSet set(id, ppos, USET_IGNORE_SPACE, NULL, ec); 729b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(ec)) { 730b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 731b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 732b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 733b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru id.extractBetween(pos, ppos.getIndex(), filter); 734b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = ppos.getIndex(); 735b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 736b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 737b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 738b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (delimiter == 0) { 739b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UChar c = id.charAt(pos); 740b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if ((c == TARGET_SEP && target.length() == 0) || 741b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru (c == VARIANT_SEP && variant.length() == 0)) { 742b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delimiter = c; 743b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++pos; 744b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru continue; 745b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 746b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 747b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 748b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // We are about to try to parse a spec with no delimiter 749b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // when we can no longer do so (we can only do so at the 750b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // start); break. 751b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (delimiter == 0 && specCount > 0) { 752b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 753b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 754b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 755b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString spec = ICU_Utility::parseUnicodeIdentifier(id, pos); 756b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (spec.length() == 0) { 757b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Note that if there was a trailing delimiter, we 758b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // consume it. So Foo-, Foo/, Foo-Bar/, and Foo/Bar- 759b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // are legal. 760b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 761b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 762b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 763b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru switch (delimiter) { 764b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case 0: 765b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru first = spec; 766b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 767b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case TARGET_SEP: 768b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target = spec; 769b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 770b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru case VARIANT_SEP: 771b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru variant = spec; 772b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru break; 773b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 774b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru ++specCount; 775b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delimiter = 0; 776b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 777b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 778b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // A spec with no prior character is either source or target, 779b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // depending on whether an explicit "-target" was seen. 780b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (first.length() != 0) { 781b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (target.length() == 0) { 782b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru target = first; 783b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 784b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru source = first; 785b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 786b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 787b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 788b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Must have either source or target 789b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source.length() == 0 && target.length() == 0) { 790b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru pos = start; 791b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 792b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 793b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 794b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Empty source or target defaults to ANY 795b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UBool sawSource = TRUE; 796b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (source.length() == 0) { 797103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius source.setTo(ANY, 3); 798b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru sawSource = FALSE; 799b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 800b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (target.length() == 0) { 801103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius target.setTo(ANY, 3); 802b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 803b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 804b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new Specs(source, target, variant, sawSource, filter); 805b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 806b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 807b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 808b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Givens a Spec object, convert it to a SingleID object. The 809b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Spec object is a more unprocessed parse result. The SingleID 810b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * object contains information about canonical and basic IDs. 811b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID; never returns NULL. Returned object always 812b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * has 'filter' field of NULL. 813b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 814b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID* 815b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToID(const Specs* specs, int32_t dir) { 816b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString canonID; 817b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString basicID; 818b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString basicPrefix; 819b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs != NULL) { 820b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 821b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (dir == FORWARD) { 822b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs->sawSource) { 823b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(specs->source).append(TARGET_SEP); 824b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 825b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicPrefix = specs->source; 826b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicPrefix.append(TARGET_SEP); 827b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 828b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(specs->target); 829b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } else { 830b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(specs->target).append(TARGET_SEP).append(specs->source); 831b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 832b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs->variant.length() != 0) { 833b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(VARIANT_SEP).append(specs->variant); 834b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 835b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID = basicPrefix; 836b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID.append(buf); 837b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs->filter.length() != 0) { 838b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.insert(0, specs->filter); 839b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 840b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru canonID = buf; 841b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 842b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new SingleID(canonID, basicID); 843b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 844b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 845b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 846b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Given a Specs object, return a SingleID representing the 847b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * special inverse of that ID. If there is no special inverse 848b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * then return NULL. 849b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * @return a SingleID or NULL. Returned object always has 850b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * 'filter' field of NULL. 851b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 852b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::SingleID* 853b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliteratorIDParser::specsToSpecialInverse(const Specs& specs, UErrorCode &status) { 854103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius if (0!=specs.source.caseCompare(ANY, 3, U_FOLD_CASE_DEFAULT)) { 855b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 856b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 857f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius umtx_initOnce(gSpecialInversesInitOnce, init, status); 858f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (U_FAILURE(status)) { 859f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius return NULL; 860f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius } 861b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 862b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString* inverseTarget; 863b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 864b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_lock(&LOCK); 865b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru inverseTarget = (UnicodeString*) SPECIAL_INVERSES->get(specs.target); 866b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru umtx_unlock(&LOCK); 867b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 868b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (inverseTarget != NULL) { 869b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // If the original ID contained "Any-" then make the 870b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // special inverse "Any-Foo"; otherwise make it "Foo". 871b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // So "Any-NFC" => "Any-NFD" but "NFC" => "NFD". 872b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UnicodeString buf; 873b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs.filter.length() != 0) { 874b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(specs.filter); 875b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 876b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs.sawSource) { 877103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius buf.append(ANY, 3).append(TARGET_SEP); 878b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 879b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(*inverseTarget); 880b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 881103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius UnicodeString basicID(TRUE, ANY, 3); 882b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID.append(TARGET_SEP).append(*inverseTarget); 883b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 884b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (specs.variant.length() != 0) { 885b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru buf.append(VARIANT_SEP).append(specs.variant); 886b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru basicID.append(VARIANT_SEP).append(specs.variant); 887b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 888b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return new SingleID(buf, basicID); 889b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 890b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return NULL; 891b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 892b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 893b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 894b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Glue method to get around access problems in C++. This would 895b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * ideally be inline but we want to avoid a circular header 896b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * dependency. 897b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 898b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruTransliterator* TransliteratorIDParser::createBasicInstance(const UnicodeString& id, const UnicodeString* canonID) { 899b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return Transliterator::createBasicInstance(id, canonID); 900b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 901b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 902b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 903f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius * Initialize static memory. Called through umtx_initOnce only. 904b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 905b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::init(UErrorCode &status) { 906f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius U_ASSERT(SPECIAL_INVERSES == NULL); 907f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius ucln_i18n_registerCleanup(UCLN_I18N_TRANSLITERATOR, utrans_transliterator_cleanup); 908b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 909f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SPECIAL_INVERSES = new Hashtable(TRUE, status); 910f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius if (SPECIAL_INVERSES == NULL) { 911c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 912c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru return; 913c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } 914f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius SPECIAL_INVERSES->setValueDeleter(uprv_deleteUObject); 915b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 916b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 917b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 918b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru * Free static memory. 919b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru */ 920b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid TransliteratorIDParser::cleanup() { 921b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (SPECIAL_INVERSES) { 922b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru delete SPECIAL_INVERSES; 923b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru SPECIAL_INVERSES = NULL; 924b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 925f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius gSpecialInversesInitOnce.reset(); 926b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 927b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 928b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 929b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 930b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 931b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 932b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru//eof 933