1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4**********************************************************************
5*   Copyright (c) 2002-2007, International Business Machines Corporation
6*   and others.  All Rights Reserved.
7**********************************************************************
8*   Date        Name        Description
9*   01/14/2002  aliu        Creation.
10**********************************************************************
11*/
12
13package com.ibm.icu.text;
14import com.ibm.icu.impl.Utility;
15
16/**
17 * A replacer that produces static text as its output.  The text may
18 * contain transliterator stand-in characters that represent nested
19 * UnicodeReplacer objects, making it possible to encode a tree of
20 * replacers in a StringReplacer.  A StringReplacer that contains such
21 * stand-ins is called a <em>complex</em> StringReplacer.  A complex
22 * StringReplacer has a slower processing loop than a non-complex one.
23 * @author Alan Liu
24 */
25class StringReplacer implements UnicodeReplacer {
26
27    /**
28     * Output text, possibly containing stand-in characters that
29     * represent nested UnicodeReplacers.
30     */
31    private String output;
32
33    /**
34     * Cursor position.  Value is ignored if hasCursor is false.
35     */
36    private int cursorPos;
37
38    /**
39     * True if this object outputs a cursor position.
40     */
41    private boolean hasCursor;
42
43    /**
44     * A complex object contains nested replacers and requires more
45     * complex processing.  StringReplacers are initially assumed to
46     * be complex.  If no nested replacers are seen during processing,
47     * then isComplex is set to false, and future replacements are
48     * short circuited for better performance.
49     */
50    private boolean isComplex;
51
52    /**
53     * Object that translates stand-in characters in 'output' to
54     * UnicodeReplacer objects.
55     */
56    private final RuleBasedTransliterator.Data data;
57
58    /**
59     * Construct a StringReplacer that sets the emits the given output
60     * text and sets the cursor to the given position.
61     * @param theOutput text that will replace input text when the
62     * replace() method is called.  May contain stand-in characters
63     * that represent nested replacers.
64     * @param theCursorPos cursor position that will be returned by
65     * the replace() method
66     * @param theData transliterator context object that translates
67     * stand-in characters to UnicodeReplacer objects
68     */
69    public StringReplacer(String theOutput,
70                          int theCursorPos,
71                          RuleBasedTransliterator.Data theData) {
72        output = theOutput;
73        cursorPos = theCursorPos;
74        hasCursor = true;
75        data = theData;
76        isComplex = true;
77    }
78
79    /**
80     * Construct a StringReplacer that sets the emits the given output
81     * text and does not modify the cursor.
82     * @param theOutput text that will replace input text when the
83     * replace() method is called.  May contain stand-in characters
84     * that represent nested replacers.
85     * @param theData transliterator context object that translates
86     * stand-in characters to UnicodeReplacer objects
87     */
88    public StringReplacer(String theOutput,
89                          RuleBasedTransliterator.Data theData) {
90        output = theOutput;
91        cursorPos = 0;
92        hasCursor = false;
93        data = theData;
94        isComplex = true;
95    }
96
97//=    public static UnicodeReplacer valueOf(String output,
98//=                                          int cursorPos,
99//=                                          RuleBasedTransliterator.Data data) {
100//=        if (output.length() == 1) {
101//=            char c = output.charAt(0);
102//=            UnicodeReplacer r = data.lookupReplacer(c);
103//=            if (r != null) {
104//=                return r;
105//=            }
106//=        }
107//=        return new StringReplacer(output, cursorPos, data);
108//=    }
109
110    /**
111     * UnicodeReplacer API
112     */
113    @Override
114    public int replace(Replaceable text,
115                       int start,
116                       int limit,
117                       int[] cursor) {
118        int outLen;
119        int newStart = 0;
120
121        // NOTE: It should be possible to _always_ run the complex
122        // processing code; just slower.  If not, then there is a bug
123        // in the complex processing code.
124
125        // Simple (no nested replacers) Processing Code :
126        if (!isComplex) {
127            text.replace(start, limit, output);
128            outLen = output.length();
129
130            // Setup default cursor position (for cursorPos within output)
131            newStart = cursorPos;
132        }
133
134        // Complex (nested replacers) Processing Code :
135        else {
136            /* When there are segments to be copied, use the Replaceable.copy()
137             * API in order to retain out-of-band data.  Copy everything to the
138             * end of the string, then copy them back over the key.  This preserves
139             * the integrity of indices into the key and surrounding context while
140             * generating the output text.
141             */
142            StringBuffer buf = new StringBuffer();
143            int oOutput; // offset into 'output'
144            isComplex = false;
145
146            // The temporary buffer starts at tempStart, and extends
147            // to destLimit + tempExtra.  The start of the buffer has a single
148            // character from before the key.  This provides style
149            // data when addition characters are filled into the
150            // temporary buffer.  If there is nothing to the left, use
151            // the non-character U+FFFF, which Replaceable subclasses
152            // should treat specially as a "no-style character."
153            // destStart points to the point after the style context
154            // character, so it is tempStart+1 or tempStart+2.
155            int tempStart = text.length(); // start of temp buffer
156            int destStart = tempStart; // copy new text to here
157            if (start > 0) {
158                int len = UTF16.getCharCount(text.char32At(start-1));
159                text.copy(start-len, start, tempStart);
160                destStart += len;
161            } else {
162                text.replace(tempStart, tempStart, "\uFFFF");
163                destStart++;
164            }
165            int destLimit = destStart;
166            int tempExtra = 0; // temp chars after destLimit
167
168            for (oOutput=0; oOutput<output.length(); ) {
169                if (oOutput == cursorPos) {
170                    // Record the position of the cursor
171                    newStart = buf.length() + destLimit - destStart; // relative to start
172                    // the buf.length() was inserted for bug 5789
173                    // the problem is that if we are accumulating into a buffer (when r == null below)
174                    // then the actual length of the text at that point needs to add the buf length.
175                    // there was an alternative suggested in #5789, but that looks like it won't work
176                    // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
177                }
178                int c = UTF16.charAt(output, oOutput);
179
180                // When we are at the last position copy the right style
181                // context character into the temporary buffer.  We don't
182                // do this before because it will provide an incorrect
183                // right context for previous replace() operations.
184                int nextIndex = oOutput + UTF16.getCharCount(c);
185                if (nextIndex == output.length()) {
186                    tempExtra = UTF16.getCharCount(text.char32At(limit));
187                    text.copy(limit, limit+tempExtra, destLimit);
188                }
189
190                UnicodeReplacer r = data.lookupReplacer(c);
191                if (r == null) {
192                    // Accumulate straight (non-segment) text.
193                    UTF16.append(buf, c);
194                } else {
195                    isComplex = true;
196
197                    // Insert any accumulated straight text.
198                    if (buf.length() > 0) {
199                        text.replace(destLimit, destLimit, buf.toString());
200                        destLimit += buf.length();
201                        buf.setLength(0);
202                    }
203
204                    // Delegate output generation to replacer object
205                    int len = r.replace(text, destLimit, destLimit, cursor);
206                    destLimit += len;
207                }
208                oOutput = nextIndex;
209            }
210            // Insert any accumulated straight text.
211            if (buf.length() > 0) {
212                text.replace(destLimit, destLimit, buf.toString());
213                destLimit += buf.length();
214            }
215            if (oOutput == cursorPos) {
216                // Record the position of the cursor
217                newStart = destLimit - destStart; // relative to start
218            }
219
220            outLen = destLimit - destStart;
221
222            // Copy new text to start, and delete it
223            text.copy(destStart, destLimit, start);
224            text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
225
226            // Delete the old text (the key)
227            text.replace(start + outLen, limit + outLen, "");
228        }
229
230        if (hasCursor) {
231            // Adjust the cursor for positions outside the key.  These
232            // refer to code points rather than code units.  If cursorPos
233            // is within the output string, then use newStart, which has
234            // already been set above.
235            if (cursorPos < 0) {
236                newStart = start;
237                int n = cursorPos;
238                // Outside the output string, cursorPos counts code points
239                while (n < 0 && newStart > 0) {
240                    newStart -= UTF16.getCharCount(text.char32At(newStart-1));
241                    ++n;
242                }
243                newStart += n;
244            } else if (cursorPos > output.length()) {
245                newStart = start + outLen;
246                int n = cursorPos - output.length();
247                // Outside the output string, cursorPos counts code points
248                while (n > 0 && newStart < text.length()) {
249                    newStart += UTF16.getCharCount(text.char32At(newStart));
250                    --n;
251                }
252                newStart += n;
253            } else {
254                // Cursor is within output string.  It has been set up above
255                // to be relative to start.
256                newStart += start;
257            }
258
259            cursor[0] = newStart;
260        }
261
262        return outLen;
263    }
264
265    /**
266     * UnicodeReplacer API
267     */
268    @Override
269    public String toReplacerPattern(boolean escapeUnprintable) {
270        StringBuffer rule = new StringBuffer();
271        StringBuffer quoteBuf = new StringBuffer();
272
273        int cursor = cursorPos;
274
275        // Handle a cursor preceding the output
276        if (hasCursor && cursor < 0) {
277            while (cursor++ < 0) {
278                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
279            }
280            // Fall through and append '|' below
281        }
282
283        for (int i=0; i<output.length(); ++i) {
284            if (hasCursor && i == cursor) {
285                Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
286            }
287            char c = output.charAt(i); // Ok to use 16-bits here
288
289            UnicodeReplacer r = data.lookupReplacer(c);
290            if (r == null) {
291                Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
292            } else {
293                StringBuffer buf = new StringBuffer(" ");
294                buf.append(r.toReplacerPattern(escapeUnprintable));
295                buf.append(' ');
296                Utility.appendToRule(rule, buf.toString(),
297                                     true, escapeUnprintable, quoteBuf);
298            }
299        }
300
301        // Handle a cursor after the output.  Use > rather than >= because
302        // if cursor == output.length() it is at the end of the output,
303        // which is the default position, so we need not emit it.
304        if (hasCursor && cursor > output.length()) {
305            cursor -= output.length();
306            while (cursor-- > 0) {
307                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
308            }
309            Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
310        }
311        // Flush quoteBuf out to result
312        Utility.appendToRule(rule, -1,
313                             true, escapeUnprintable, quoteBuf);
314
315        return rule.toString();
316    }
317
318    /**
319     * Union the set of all characters that may output by this object
320     * into the given set.
321     * @param toUnionTo the set into which to union the output characters
322     */
323    @Override
324    public void addReplacementSetTo(UnicodeSet toUnionTo) {
325        int ch;
326        for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
327            ch = UTF16.charAt(output, i);
328            UnicodeReplacer r = data.lookupReplacer(ch);
329            if (r == null) {
330                toUnionTo.add(ch);
331            } else {
332                r.addReplacementSetTo(toUnionTo);
333            }
334        }
335    }
336}
337
338//eof
339