1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius*   Copyright (c) 2002-2012, International Business Machines Corporation
4ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   and others.  All Rights Reserved.
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Date        Name        Description
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   01/21/2002  aliu        Creation.
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru**********************************************************************
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_TRANSLITERATION
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
15103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/uniset.h"
16103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "strrepl.h"
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "rbt_data.h"
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "util.h"
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_BEGIN
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer::~UnicodeReplacer() {}
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(StringReplacer)
25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a StringReplacer that sets the emits the given output
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text and sets the cursor to the given position.
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theOutput text that will replace input text when the
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replace() method is called.  May contain stand-in characters
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that represent nested replacers.
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theCursorPos cursor position that will be returned by
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the replace() method
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theData transliterator context object that translates
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stand-in characters to UnicodeReplacer objects
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const UnicodeString& theOutput,
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               int32_t theCursorPos,
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               const TransliterationRuleData* theData) {
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    output = theOutput;
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cursorPos = theCursorPos;
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    hasCursor = TRUE;
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data = theData;
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isComplex = TRUE;
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Construct a StringReplacer that sets the emits the given output
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * text and does not modify the cursor.
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theOutput text that will replace input text when the
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * replace() method is called.  May contain stand-in characters
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * that represent nested replacers.
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * @param theData transliterator context object that translates
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * stand-in characters to UnicodeReplacer objects
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const UnicodeString& theOutput,
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                               const TransliterationRuleData* theData) {
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    output = theOutput;
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cursorPos = 0;
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    hasCursor = FALSE;
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data = theData;
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isComplex = TRUE;
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Copy constructor.
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::StringReplacer(const StringReplacer& other) :
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeFunctor(other),
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeReplacer(other)
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru{
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    output = other.output;
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    cursorPos = other.cursorPos;
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    hasCursor = other.hasCursor;
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data = other.data;
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    isComplex = other.isComplex;
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Destructor
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruStringReplacer::~StringReplacer() {
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeFunctor* StringReplacer::clone() const {
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return new StringReplacer(*this);
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeFunctor
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeReplacer* StringReplacer::toReplacer() const {
9654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius  return const_cast<StringReplacer *>(this);
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruint32_t StringReplacer::replace(Replaceable& text,
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                int32_t start,
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                int32_t limit,
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                int32_t& cursor) {
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t outLen;
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t newStart = 0;
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // NOTE: It should be possible to _always_ run the complex
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // processing code; just slower.  If not, then there is a bug
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // in the complex processing code.
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Simple (no nested replacers) Processing Code :
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (!isComplex) {
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        text.handleReplaceBetween(start, limit, output);
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        outLen = output.length();
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Setup default cursor position (for cursorPos within output)
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        newStart = cursorPos;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Complex (nested replacers) Processing Code :
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    else {
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* When there are segments to be copied, use the Replaceable.copy()
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * API in order to retain out-of-band data.  Copy everything to the
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * end of the string, then copy them back over the key.  This preserves
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * the integrity of indices into the key and surrounding context while
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * generating the output text.
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeString buf;
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t oOutput; // offset into 'output'
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        isComplex = FALSE;
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // The temporary buffer starts at tempStart, and extends
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // to destLimit.  The start of the buffer has a single
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // character from before the key.  This provides style
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // data when addition characters are filled into the
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // temporary buffer.  If there is nothing to the left, use
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // the non-character U+FFFF, which Replaceable subclasses
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // should treat specially as a "no-style character."
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // destStart points to the point after the style context
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // character, so it is tempStart+1 or tempStart+2.
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t tempStart = text.length(); // start of temp buffer
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t destStart = tempStart; // copy new text to here
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (start > 0) {
146103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            int32_t len = U16_LENGTH(text.char32At(start-1));
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.copy(start-len, start, tempStart);
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            destStart += len;
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString str((UChar) 0xFFFF);
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.handleReplaceBetween(tempStart, tempStart, str);
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            destStart++;
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int32_t destLimit = destStart;
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        for (oOutput=0; oOutput<output.length(); ) {
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (oOutput == cursorPos) {
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Record the position of the cursor
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                newStart = destLimit - destStart; // relative to start
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UChar32 c = output.char32At(oOutput);
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeReplacer* r = data->lookupReplacer(c);
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if (r == NULL) {
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Accumulate straight (non-segment) text.
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                buf.append(c);
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            } else {
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                isComplex = TRUE;
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Insert any accumulated straight text.
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if (buf.length() > 0) {
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    text.handleReplaceBetween(destLimit, destLimit, buf);
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    destLimit += buf.length();
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    buf.truncate(0);
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                // Delegate output generation to replacer object
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                int32_t len = r->replace(text, destLimit, destLimit, cursor);
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                destLimit += len;
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
180103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            oOutput += U16_LENGTH(c);
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Insert any accumulated straight text.
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (buf.length() > 0) {
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            text.handleReplaceBetween(destLimit, destLimit, buf);
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            destLimit += buf.length();
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (oOutput == cursorPos) {
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Record the position of the cursor
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart = destLimit - destStart; // relative to start
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        outLen = destLimit - destStart;
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Copy new text to start, and delete it
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        text.copy(destStart, destLimit, start);
196103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        text.handleReplaceBetween(tempStart + outLen, destLimit + outLen, UnicodeString());
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Delete the old text (the key)
199103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        text.handleReplaceBetween(start + outLen, limit + outLen, UnicodeString());
200ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (hasCursor) {
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Adjust the cursor for positions outside the key.  These
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // refer to code points rather than code units.  If cursorPos
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // is within the output string, then use newStart, which has
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // already been set above.
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (cursorPos < 0) {
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart = start;
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t n = cursorPos;
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Outside the output string, cursorPos counts code points
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while (n < 0 && newStart > 0) {
212103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                newStart -= U16_LENGTH(text.char32At(newStart-1));
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                ++n;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart += n;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if (cursorPos > output.length()) {
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart = start + outLen;
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            int32_t n = cursorPos - output.length();
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Outside the output string, cursorPos counts code points
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            while (n > 0 && newStart < text.length()) {
221103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius                newStart += U16_LENGTH(text.char32At(newStart));
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                --n;
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart += n;
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // Cursor is within output string.  It has been set up above
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            // to be relative to start.
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            newStart += start;
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cursor = newStart;
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return outLen;
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeReplacer API
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruUnicodeString& StringReplacer::toReplacerPattern(UnicodeString& rule,
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                                 UBool escapeUnprintable) const {
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    rule.truncate(0);
243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeString quoteBuf;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t cursor = cursorPos;
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Handle a cursor preceding the output
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (hasCursor && cursor < 0) {
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while (cursor++ < 0) {
250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);
251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        // Fall through and append '|' below
253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for (int32_t i=0; i<output.length(); ++i) {
256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (hasCursor && i == cursor) {
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar c = output.charAt(i); // Ok to use 16-bits here
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeReplacer* r = data->lookupReplacer(c);
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (r == NULL) {
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(rule, c, FALSE, escapeUnprintable, quoteBuf);
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            UnicodeString buf;
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            r->toReplacerPattern(buf, escapeUnprintable);
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.insert(0, (UChar)0x20);
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            buf.append((UChar)0x20);
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(rule, buf,
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                                      TRUE, escapeUnprintable, quoteBuf);
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Handle a cursor after the output.  Use > rather than >= because
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // if cursor == output.length() it is at the end of the output,
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // which is the default position, so we need not emit it.
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (hasCursor && cursor > output.length()) {
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        cursor -= output.length();
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while (cursor-- > 0) {
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ICU_Utility::appendToRule(rule, (UChar)0x0040 /*@*/, TRUE, escapeUnprintable, quoteBuf);
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        ICU_Utility::appendToRule(rule, (UChar)0x007C /*|*/, TRUE, escapeUnprintable, quoteBuf);
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    // Flush quoteBuf out to result
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ICU_Utility::appendToRule(rule, -1,
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                              TRUE, escapeUnprintable, quoteBuf);
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return rule;
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Implement UnicodeReplacer
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringReplacer::addReplacementSetTo(UnicodeSet& toUnionTo) const {
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 ch;
296103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    for (int32_t i=0; i<output.length(); i+=U16_LENGTH(ch)) {
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ch = output.char32At(i);
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UnicodeReplacer* r = data->lookupReplacer(ch);
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if (r == NULL) {
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        toUnionTo.add(ch);
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    } else {
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        r->addReplacementSetTo(toUnionTo);
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/**
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UnicodeFunctor API
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid StringReplacer::setData(const TransliterationRuleData* d) {
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    data = d;
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t i = 0;
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while (i<output.length()) {
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UChar32 c = output.char32At(i);
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        UnicodeFunctor* f = data->lookup(c);
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if (f != NULL) {
317ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            f->setData(data);
318ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
319103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius        i += U16_LENGTH(c);
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruU_NAMESPACE_END
324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_TRANSLITERATION */
326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
327ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru//eof
328