1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius* Copyright (C) 1999-2011, International Business Machines 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 11/17/99 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/unifilt.h" 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uniset.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cpdtrans.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uvector.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "tridpars.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru// keep in sync with Transliterator 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//static const UChar ID_SEP = 0x002D; /*-*/ 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar ID_DELIM = 0x003B; /*;*/ 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar NEWLINE = 10; 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic const UChar COLON_COLON[] = {0x3A, 0x3A, 0}; //"::" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst UChar CompoundTransliterator::PASS_STRING[] = { 0x0025, 0x0050, 0x0061, 0x0073, 0x0073, 0 }; // "%Pass" 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CompoundTransliterator) 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Constructs a new compound transliterator given an array of 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterators. The array of transliterators may be of any 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length, including zero or one, however, useful compound 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterators have at least two components. 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param transliterators array of <code>Transliterator</code> 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * objects 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param transliteratorCount The number of 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <code>Transliterator</code> objects in transliterators. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param filter the filter. Any character for which 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <tt>filter.contains()</tt> returns <tt>false</tt> will not be 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * altered by this transliterator. If <tt>filter</tt> is 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * <tt>null</tt> then no filtering is applied. 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator( 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator* const transliterators[], 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t transliteratorCount, 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter) : 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(joinIDs(transliterators, transliteratorCount), adoptedFilter), 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), count(0), numAnonymousRBTs(0) { 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setTransliterators(transliterators, transliteratorCount); 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Splits an ID of the form "ID;ID;..." into a compound using each 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of the IDs. 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id of above form 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param forward if false, does the list in reverse order, and 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * takes the inverse of each ID. 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(const UnicodeString& id, 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter, 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& /*parseError*/, 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, adoptedFilter), 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), numAnonymousRBTs(0) { 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO add code for parseError...currently unused, but 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // later may be used by parsing code... 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(id, direction, TRUE, status); 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(const UnicodeString& id, 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& /*parseError*/, 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(id, 0), // set filter to 0 here! 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), numAnonymousRBTs(0) { 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO add code for parseError...currently unused, but 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // later may be used by parsing code... 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(id, UTRANS_FORWARD, TRUE, status); 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Private constructor for use of TransliteratorAlias 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(const UnicodeString& newID, 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector& list, 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFilter* adoptedFilter, 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t anonymousRBTs, 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& /*parseError*/, 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(newID, adoptedFilter), 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), numAnonymousRBTs(anonymousRBTs) 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(list, UTRANS_FORWARD, FALSE, status); 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Private constructor for Transliterator from a vector of 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterators. The caller is responsible for fixing up the 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ID. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(UVector& list, 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& /*parseError*/, 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 11183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius Transliterator(UnicodeString(), NULL), 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), numAnonymousRBTs(0) 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // TODO add code for parseError...currently unused, but 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // later may be used by parsing code... 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(list, UTRANS_FORWARD, FALSE, status); 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assume caller will fixup ID 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(UVector& list, 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t anonymousRBTs, 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UParseError& /*parseError*/, 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) : 12483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius Transliterator(UnicodeString(), NULL), 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans(0), numAnonymousRBTs(anonymousRBTs) 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(list, UTRANS_FORWARD, FALSE, status); 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Finish constructing a transliterator: only to be called by 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * constructors. Before calling init(), set trans and filter to NULL. 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param id the id containing ';'-separated entries 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param idSplitPoint the index into id at which the 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * adoptedSplitTransliterator should be inserted, if there is one, or 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * -1 if there is none. 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param adoptedSplitTransliterator a transliterator to be inserted 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * before the entry at offset idSplitPoint in the id string. May be 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * NULL to insert no entry. 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fixReverseID if TRUE, then reconstruct the ID of reverse 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * entries by calling getID() of component entries. Some constructors 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * do not require this because they apply a facade ID anyway. 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code indicating success or failure 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::init(const UnicodeString& id, 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fixReverseID, 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(trans == 0); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status)) { 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVector list(status); 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet* compoundFilter = NULL; 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString regenID; 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!TransliteratorIDParser::parseCompoundID(id, direction, 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru regenID, list, compoundFilter)) { 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_INVALID_ID; 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete compoundFilter; 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TransliteratorIDParser::instantiateList(list, status); 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru init(list, direction, fixReverseID, status); 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (compoundFilter != NULL) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru adoptFilter(compoundFilter); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Finish constructing a transliterator: only to be called by 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * constructors. Before calling init(), set trans and filter to NULL. 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param list a vector of transliterator objects to be adopted. It 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * should NOT be empty. The list should be in declared order. That 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is, it should be in the FORWARD order; if direction is REVERSE then 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the list order will be reversed. 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param direction either FORWARD or REVERSE 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param fixReverseID if TRUE, then reconstruct the ID of reverse 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * entries by calling getID() of component entries. Some constructors 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * do not require this because they apply a facade ID anyway. 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param status the error code indicating success or failure 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::init(UVector& list, 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTransDirection direction, 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool fixReverseID, 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode& status) { 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(trans == 0); 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Allocate array 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_SUCCESS(status)) { 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = list.size(); 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans = (Transliterator **)uprv_malloc(count * sizeof(Transliterator *)); 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* test for NULL */ 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (trans == 0) { 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru status = U_MEMORY_ALLOCATION_ERROR; 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (U_FAILURE(status) || trans == 0) { 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // assert(trans == 0); 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Move the transliterators from the vector into an array. 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Reverse the order if necessary. 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i; 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<count; ++i) { 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t j = (direction == UTRANS_FORWARD) ? i : count - 1 - i; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans[i] = (Transliterator*) list.elementAt(j); 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If the direction is UTRANS_REVERSE then we may need to fix the 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // ID. 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (direction == UTRANS_REVERSE && fixReverseID) { 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString newID; 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (i=0; i<count; ++i) { 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i > 0) { 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newID.append(ID_DELIM); 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newID.append(trans[i]->getID()); 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setID(newID); 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru computeMaximumContextLength(); 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Return the IDs of the given list of transliterators, concatenated 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * with ID_DELIM delimiting them. Equivalent to the perlish expression 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * join(ID_DELIM, map($_.getID(), transliterators). 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString CompoundTransliterator::joinIDs(Transliterator* const transliterators[], 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t transCount) { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString id; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<transCount; ++i) { 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (i > 0) { 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id.append(ID_DELIM); 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru id.append(transliterators[i]->getID()); 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return id; // Return temporary 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::CompoundTransliterator(const CompoundTransliterator& t) : 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator(t), trans(0), count(0), numAnonymousRBTs(-1) { 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *this = t; 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator::~CompoundTransliterator() { 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru freeTransliterators(); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::freeTransliterators(void) { 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (trans != 0) { 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delete trans[i]; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(trans); 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans = 0; 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = 0; 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assignment operator. 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCompoundTransliterator& CompoundTransliterator::operator=( 28185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho const CompoundTransliterator& t) 28285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho{ 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator::operator=(t); 28485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i = 0; 28585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool failed = FALSE; 28685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (trans != NULL) { 28785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for (i=0; i<count; ++i) { 28885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho delete trans[i]; 28985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trans[i] = 0; 29085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (t.count > count) { 29385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (trans != NULL) { 29485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trans); 29585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans = (Transliterator **)uprv_malloc(t.count * sizeof(Transliterator *)); 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = t.count; 29985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (trans != NULL) { 30085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for (i=0; i<count; ++i) { 30185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trans[i] = t.trans[i]->clone(); 30285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (trans[i] == NULL) { 30385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho failed = TRUE; 30485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho break; 30585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 30685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 30785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 30885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 30985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho // if memory allocation failed delete backwards trans array 31085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (failed && i > 0) { 31185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t n; 31285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for (n = i-1; n >= 0; n--) { 31385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(trans[n]); 31485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trans[n] = NULL; 31585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru numAnonymousRBTs = t.numAnonymousRBTs; 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *this; 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Transliterator API. 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTransliterator* CompoundTransliterator::clone(void) const { 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new CompoundTransliterator(*this); 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the number of transliterators in this chain. 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return number of transliterators in this chain. 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t CompoundTransliterator::getCount(void) const { 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return count; 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Returns the transliterator at the given index in this chain. 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param index index into chain, from 0 to <code>getCount() - 1</code> 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @return transliterator at the given index 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruconst Transliterator& CompoundTransliterator::getTransliterator(int32_t index) const { 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return *trans[index]; 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::setTransliterators(Transliterator* const transliterators[], 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t transCount) { 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru Transliterator** a = (Transliterator **)uprv_malloc(transCount * sizeof(Transliterator *)); 34885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (a == NULL) { 34985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 35085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 35185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t i = 0; 35285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UBool failed = FALSE; 35385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for (i=0; i<transCount; ++i) { 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru a[i] = transliterators[i]->clone(); 35585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (a[i] == NULL) { 35685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho failed = TRUE; 35785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho break; 35885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 35985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 36085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (failed && i > 0) { 36185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t n; 36285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for (n = i-1; n >= 0; n--) { 36385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uprv_free(a[n]); 36485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho a[n] = NULL; 36585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 36685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru adoptTransliterators(a, transCount); 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::adoptTransliterators(Transliterator* adoptedTransliterators[], 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t transCount) { 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // First free trans[] and set count to zero. Once this is done, 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // orphan the filter. Set up the new trans[]. 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru freeTransliterators(); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans = adoptedTransliterators; 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru count = transCount; 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru computeMaximumContextLength(); 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setID(joinIDs(trans, count)); 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Append c to buf, unless buf is empty or buf already ends in c. 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void _smartAppend(UnicodeString& buf, UChar c) { 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf.length() != 0 && 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.charAt(buf.length() - 1) != c) { 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(c); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& CompoundTransliterator::toRules(UnicodeString& rulesSource, 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const { 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We do NOT call toRules() on our component transliterators, in 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // general. If we have several rule-based transliterators, this 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // yields a concatenation of the rules -- not what we want. We do 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // handle compound RBT transliterators specially -- those for which 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // compoundRBTIndex >= 0. For the transliterator at compoundRBTIndex, 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // we do call toRules() recursively. 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rulesSource.truncate(0); 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (numAnonymousRBTs >= 1 && getFilter() != NULL) { 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // If we are a compound RBT and if we have a global 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // filter, then emit it at the top. 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString pat; 40583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius rulesSource.append(COLON_COLON, 2).append(getFilter()->toPattern(pat, escapeUnprintable)).append(ID_DELIM); 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString rule; 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Anonymous RuleBasedTransliterators (inline rules and 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // ::BEGIN/::END blocks) are given IDs that begin with 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // "%Pass": use toRules() to write all the rules to the output 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // (and insert "::Null;" if we have two in a row) 41483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (trans[i]->getID().startsWith(PASS_STRING, 5)) { 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans[i]->toRules(rule, escapeUnprintable); 41683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius if (numAnonymousRBTs > 1 && i > 0 && trans[i - 1]->getID().startsWith(PASS_STRING, 5)) 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rule = UNICODE_STRING_SIMPLE("::Null;") + rule; 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // we also use toRules() on CompoundTransliterators (which we 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // check for by looking for a semicolon in the ID)-- this gets 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the list of their child transliterators output in the right 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // format 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (trans[i]->getID().indexOf(ID_DELIM) >= 0) { 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans[i]->toRules(rule, escapeUnprintable); 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // for everything else, use Transliterator::toRules() 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans[i]->Transliterator::toRules(rule, escapeUnprintable); 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _smartAppend(rulesSource, NEWLINE); 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rulesSource.append(rule); 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru _smartAppend(rulesSource, ID_DELIM); 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return rulesSource; 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement Transliterator framework 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 440ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::handleGetSourceSet(UnicodeSet& result) const { 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet set; 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.clear(); 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.addAll(trans[i]->getSourceSet(set)); 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Take the example of Hiragana-Latin. This is really 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Hiragana-Katakana; Katakana-Latin. The source set of 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // these two is roughly [:Hiragana:] and [:Katakana:]. 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // But the source set for the entire transliterator is 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // actually [:Hiragana:] ONLY -- that is, the first 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // non-empty source set. 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is a heuristic, and not 100% reliable. 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!result.isEmpty()) { 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Override Transliterator framework 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeSet& CompoundTransliterator::getTargetSet(UnicodeSet& result) const { 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeSet set; 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.clear(); 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // This is a heuristic, and not 100% reliable. 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru result.addAll(trans[i]->getTargetSet(set)); 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return result; 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implements {@link Transliterator#handleTransliterate}. 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::handleTransliterate(Replaceable& text, UTransPosition& index, 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool incremental) const { 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Call each transliterator with the same contextStart and 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * start, but with the limit as modified 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * by preceding transliterators. The start index must be 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * reset for each transliterator to give each a chance to 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterate the text. The initial contextStart index is known 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to still point to the same place after each transliterator 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is called because each transliterator will not change the 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text between contextStart and the initial start index. 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * IMPORTANT: After the first transliterator, each subsequent 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * transliterator only gets to transliterate text committed by 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * preceding transliterators; that is, the start (output 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * value) of transliterator i becomes the limit (input value) 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * of transliterator i+1. Finally, the overall limit is fixed 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * up before we return. 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Assumptions we make here: 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (1) contextStart <= start <= limit <= contextLimit <= text.length() 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (2) start <= start' <= limit' ;cursor doesn't move back 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (3) start <= limit' ;text before cursor unchanged 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - start' is the value of start after calling handleKT 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - limit' is the value of limit after calling handleKT 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /** 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Example: 3 transliterators. This example illustrates the 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * mechanics we need to implement. C, S, and L are the contextStart, 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * start, and limit. gl is the globalLimit. contextLimit is 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * equal to limit throughout. 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1. h-u, changes hex to Unicode 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4 7 a d 0 4 7 a 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * abc/u0061/u => abca/u 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C S L C S L gl=f->a 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2. upup, changes "x" to "XX" 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4 7 a 4 7 a 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * abca/u => abcAA/u 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C SL C S 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * L gl=a->b 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 3. u-h, changes Unicode to hex 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4 7 a 4 7 a d 0 3 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * abcAA/u => abc/u0041/u0041/u 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C S L C S 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * L gl=b->15 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4. return 526ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 4 7 a d 0 3 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * abc/u0041/u0041/u 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * C S L 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (count < 1) { 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.start = index.limit; 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; // Short circuit for empty compound transliterators 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // compoundLimit is the limit value for the entire compound 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // operation. We overwrite index.limit with the previous 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // index.start. After each transliteration, we update 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // compoundLimit for insertions or deletions that have happened. 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t compoundLimit = index.limit; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // compoundStart is the start for the entire compound 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // operation. 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t compoundStart = index.start; 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t delta = 0; // delta in length 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Give each transliterator a crack at the run of characters. 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // See comments at the top of the method for more detail. 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.start = compoundStart; // Reset start 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit = index.limit; 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (index.start == index.limit) { 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Short circuit for empty range 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru break; 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trans[i]->filteredTransliterate(text, index, incremental); 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // In a properly written transliterator, start == limit after 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // handleTransliterate() returns when incremental is false. 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Catch cases where the subclass doesn't do this, and throw 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // an exception. (Just pinning start to limit is a bad idea, 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // because what's probably happening is that the subclass 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // isn't transliterating all the way to the end, and it should 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in non-incremental mode.) 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!incremental && index.start != index.limit) { 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // We can't throw an exception, so just fudge things 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.start = index.limit; 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Cumulative delta for insertions/deletions 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru delta += index.limit - limit; 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (incremental) { 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // In the incremental case, only allow subsequent 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // transliterators to modify what has already been 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // completely processed by prior transliterators. In the 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // non-incrmental case, allow each transliterator to 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // process the entire text. 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.limit = index.start; 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru compoundLimit += delta; 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Start is good where it is -- where the last transliterator left 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // it. Limit needs to be put back where it was, modulo 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // adjustments for deletions/insertions. 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru index.limit = compoundLimit; 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Sets the length of the longest context required by this transliterator. 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * This is <em>preceding</em> context. 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid CompoundTransliterator::computeMaximumContextLength(void) { 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t max = 0; 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<count; ++i) { 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len = trans[i]->getMaximumContextLength(); 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (len > max) { 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru max = len; 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru setMaximumContextLength(max); 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* eof */ 615