1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (c) 2002-2012, International Business Machines Corporation 4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* and others. All Rights Reserved. 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Date Name Description 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 01/21/2002 aliu Creation. 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru********************************************************************** 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h" 16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "strrepl.h" 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h" 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h" 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer::~UnicodeReplacer() {} 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer) 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a StringReplacer that sets the emits the given output 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text and sets the cursor to the given position. 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theOutput text that will replace input text when the 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replace() method is called. May contain stand-in characters 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that represent nested replacers. 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theCursorPos cursor position that will be returned by 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the replace() method 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theData transliterator context object that translates 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stand-in characters to UnicodeReplacer objects 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const UnicodeString& theOutput, 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t theCursorPos, 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData* theData) { 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru output = theOutput; 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursorPos = theCursorPos; 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hasCursor = TRUE; 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data = theData; 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isComplex = TRUE; 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a StringReplacer that sets the emits the given output 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text and does not modify the cursor. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theOutput text that will replace input text when the 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replace() method is called. May contain stand-in characters 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that represent nested replacers. 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theData transliterator context object that translates 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stand-in characters to UnicodeReplacer objects 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const UnicodeString& theOutput, 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const TransliterationRuleData* theData) { 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru output = theOutput; 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursorPos = 0; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hasCursor = FALSE; 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data = theData; 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isComplex = TRUE; 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor. 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const StringReplacer& other) : 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor(other), 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer(other) 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{ 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru output = other.output; 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursorPos = other.cursorPos; 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hasCursor = other.hasCursor; 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data = other.data; 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isComplex = other.isComplex; 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::~StringReplacer() { 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* StringReplacer::clone() const { 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return new StringReplacer(*this); 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer* StringReplacer::toReplacer() const { 9654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius return const_cast<StringReplacer *>(this); 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t StringReplacer::replace(Replaceable& text, 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t start, 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limit, 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t& cursor) { 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t outLen; 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t newStart = 0; 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // NOTE: It should be possible to _always_ run the complex 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // processing code; just slower. If not, then there is a bug 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // in the complex processing code. 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Simple (no nested replacers) Processing Code : 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (!isComplex) { 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(start, limit, output); 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = output.length(); 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Setup default cursor position (for cursorPos within output) 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart = cursorPos; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Complex (nested replacers) Processing Code : 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru else { 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* When there are segments to be copied, use the Replaceable.copy() 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * API in order to retain out-of-band data. Copy everything to the 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * end of the string, then copy them back over the key. This preserves 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the integrity of indices into the key and surrounding context while 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * generating the output text. 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString buf; 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t oOutput; // offset into 'output' 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isComplex = FALSE; 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // The temporary buffer starts at tempStart, and extends 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // to destLimit. The start of the buffer has a single 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // character from before the key. This provides style 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // data when addition characters are filled into the 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // temporary buffer. If there is nothing to the left, use 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // the non-character U+FFFF, which Replaceable subclasses 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // should treat specially as a "no-style character." 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // destStart points to the point after the style context 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // character, so it is tempStart+1 or tempStart+2. 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t tempStart = text.length(); // start of temp buffer 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t destStart = tempStart; // copy new text to here 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (start > 0) { 146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius int32_t len = U16_LENGTH(text.char32At(start-1)); 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.copy(start-len, start, tempStart); 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destStart += len; 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString str((UChar) 0xFFFF); 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(tempStart, tempStart, str); 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destStart++; 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t destLimit = destStart; 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (oOutput=0; oOutput<output.length(); ) { 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (oOutput == cursorPos) { 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Record the position of the cursor 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart = destLimit - destStart; // relative to start 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c = output.char32At(oOutput); 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer* r = data->lookupReplacer(c); 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (r == NULL) { 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Accumulate straight (non-segment) text. 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append(c); 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru isComplex = TRUE; 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Insert any accumulated straight text. 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf.length() > 0) { 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(destLimit, destLimit, buf); 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit += buf.length(); 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.truncate(0); 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delegate output generation to replacer object 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t len = r->replace(text, destLimit, destLimit, cursor); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit += len; 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius oOutput += U16_LENGTH(c); 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Insert any accumulated straight text. 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (buf.length() > 0) { 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.handleReplaceBetween(destLimit, destLimit, buf); 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru destLimit += buf.length(); 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (oOutput == cursorPos) { 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Record the position of the cursor 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart = destLimit - destStart; // relative to start 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru outLen = destLimit - destStart; 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Copy new text to start, and delete it 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru text.copy(destStart, destLimit, start); 196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, UnicodeString()); 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Delete the old text (the key) 199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius text.handleReplaceBetween(start + outLen, limit + outLen, UnicodeString()); 200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (hasCursor) { 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Adjust the cursor for positions outside the key. These 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // refer to code points rather than code units. If cursorPos 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // is within the output string, then use newStart, which has 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // already been set above. 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (cursorPos < 0) { 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart = start; 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t n = cursorPos; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Outside the output string, cursorPos counts code points 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (n < 0 && newStart > 0) { 212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius newStart -= U16_LENGTH(text.char32At(newStart-1)); 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++n; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart += n; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if (cursorPos > output.length()) { 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart = start + outLen; 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t n = cursorPos - output.length(); 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Outside the output string, cursorPos counts code points 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (n > 0 && newStart < text.length()) { 221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius newStart += U16_LENGTH(text.char32At(newStart)); 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru --n; 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart += n; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Cursor is within output string. It has been set up above 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // to be relative to start. 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru newStart += start; 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor = newStart; 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return outLen; 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule, 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool escapeUnprintable) const { 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru rule.truncate(0); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString quoteBuf; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t cursor = cursorPos; 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Handle a cursor preceding the output 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (hasCursor && cursor < 0) { 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (cursor++ < 0) { 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf); 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Fall through and append '|' below 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for (int32_t i=0; i<output.length(); ++i) { 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (hasCursor && i == cursor) { 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf); 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar c = output.charAt(i); // Ok to use 16-bits here 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer* r = data->lookupReplacer(c); 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (r == NULL) { 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf); 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeString buf; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru r->toReplacerPattern(buf, escapeUnprintable); 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.insert(0, (UChar)0x20); 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru buf.append((UChar)0x20); 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, buf, 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TRUE, escapeUnprintable, quoteBuf); 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Handle a cursor after the output. Use > rather than >= because 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // if cursor == output.length() it is at the end of the output, 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // which is the default position, so we need not emit it. 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (hasCursor && cursor > output.length()) { 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru cursor -= output.length(); 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (cursor-- > 0) { 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf); 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf); 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru // Flush quoteBuf out to result 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ICU_Utility::appendToRule(rule, -1, 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru TRUE, escapeUnprintable, quoteBuf); 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return rule; 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeReplacer 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const { 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 ch; 296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) { 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ch = output.char32At(i); 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeReplacer* r = data->lookupReplacer(ch); 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (r == NULL) { 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru toUnionTo.add(ch); 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru r->addReplacementSetTo(toUnionTo); 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/** 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringReplacer::setData(const TransliterationRuleData* d) { 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru data = d; 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t i = 0; 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while (i<output.length()) { 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c = output.char32At(i); 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UnicodeFunctor* f = data->lookup(c); 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if (f != NULL) { 317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru f->setData(data); 318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius i += U16_LENGTH(c); 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */ 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof 328