1/*
2**********************************************************************
3*   Copyright (c) 2002-2007, International Business Machines Corporation
4*   and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   01/14/2002  aliu        Creation.
8**********************************************************************
9*/
10
11package com.ibm.icu.text;
12import com.ibm.icu.impl.Utility;
13
14/**
15 * A replacer that produces static text as its output.  The text may
16 * contain transliterator stand-in characters that represent nested
17 * UnicodeReplacer objects, making it possible to encode a tree of
18 * replacers in a StringReplacer.  A StringReplacer that contains such
19 * stand-ins is called a <em>complex</em> StringReplacer.  A complex
20 * StringReplacer has a slower processing loop than a non-complex one.
21 * @author Alan Liu
22 */
23class StringReplacer implements UnicodeReplacer {
24
25    /**
26     * Output text, possibly containing stand-in characters that
27     * represent nested UnicodeReplacers.
28     */
29    private String output;
30
31    /**
32     * Cursor position.  Value is ignored if hasCursor is false.
33     */
34    private int cursorPos;
35
36    /**
37     * True if this object outputs a cursor position.
38     */
39    private boolean hasCursor;
40
41    /**
42     * A complex object contains nested replacers and requires more
43     * complex processing.  StringReplacers are initially assumed to
44     * be complex.  If no nested replacers are seen during processing,
45     * then isComplex is set to false, and future replacements are
46     * short circuited for better performance.
47     */
48    private boolean isComplex;
49
50    /**
51     * Object that translates stand-in characters in 'output' to
52     * UnicodeReplacer objects.
53     */
54    private final RuleBasedTransliterator.Data data;
55
56    /**
57     * Construct a StringReplacer that sets the emits the given output
58     * text and sets the cursor to the given position.
59     * @param theOutput text that will replace input text when the
60     * replace() method is called.  May contain stand-in characters
61     * that represent nested replacers.
62     * @param theCursorPos cursor position that will be returned by
63     * the replace() method
64     * @param theData transliterator context object that translates
65     * stand-in characters to UnicodeReplacer objects
66     */
67    public StringReplacer(String theOutput,
68                          int theCursorPos,
69                          RuleBasedTransliterator.Data theData) {
70        output = theOutput;
71        cursorPos = theCursorPos;
72        hasCursor = true;
73        data = theData;
74        isComplex = true;
75    }
76
77    /**
78     * Construct a StringReplacer that sets the emits the given output
79     * text and does not modify the cursor.
80     * @param theOutput text that will replace input text when the
81     * replace() method is called.  May contain stand-in characters
82     * that represent nested replacers.
83     * @param theData transliterator context object that translates
84     * stand-in characters to UnicodeReplacer objects
85     */
86    public StringReplacer(String theOutput,
87                          RuleBasedTransliterator.Data theData) {
88        output = theOutput;
89        cursorPos = 0;
90        hasCursor = false;
91        data = theData;
92        isComplex = true;
93    }
94
95//=    public static UnicodeReplacer valueOf(String output,
96//=                                          int cursorPos,
97//=                                          RuleBasedTransliterator.Data data) {
98//=        if (output.length() == 1) {
99//=            char c = output.charAt(0);
100//=            UnicodeReplacer r = data.lookupReplacer(c);
101//=            if (r != null) {
102//=                return r;
103//=            }
104//=        }
105//=        return new StringReplacer(output, cursorPos, data);
106//=    }
107
108    /**
109     * UnicodeReplacer API
110     */
111    public int replace(Replaceable text,
112                       int start,
113                       int limit,
114                       int[] cursor) {
115        int outLen;
116        int newStart = 0;
117
118        // NOTE: It should be possible to _always_ run the complex
119        // processing code; just slower.  If not, then there is a bug
120        // in the complex processing code.
121
122        // Simple (no nested replacers) Processing Code :
123        if (!isComplex) {
124            text.replace(start, limit, output);
125            outLen = output.length();
126
127            // Setup default cursor position (for cursorPos within output)
128            newStart = cursorPos;
129        }
130
131        // Complex (nested replacers) Processing Code :
132        else {
133            /* When there are segments to be copied, use the Replaceable.copy()
134             * API in order to retain out-of-band data.  Copy everything to the
135             * end of the string, then copy them back over the key.  This preserves
136             * the integrity of indices into the key and surrounding context while
137             * generating the output text.
138             */
139            StringBuffer buf = new StringBuffer();
140            int oOutput; // offset into 'output'
141            isComplex = false;
142
143            // The temporary buffer starts at tempStart, and extends
144            // to destLimit + tempExtra.  The start of the buffer has a single
145            // character from before the key.  This provides style
146            // data when addition characters are filled into the
147            // temporary buffer.  If there is nothing to the left, use
148            // the non-character U+FFFF, which Replaceable subclasses
149            // should treat specially as a "no-style character."
150            // destStart points to the point after the style context
151            // character, so it is tempStart+1 or tempStart+2.
152            int tempStart = text.length(); // start of temp buffer
153            int destStart = tempStart; // copy new text to here
154            if (start > 0) {
155                int len = UTF16.getCharCount(text.char32At(start-1));
156                text.copy(start-len, start, tempStart);
157                destStart += len;
158            } else {
159                text.replace(tempStart, tempStart, "\uFFFF");
160                destStart++;
161            }
162            int destLimit = destStart;
163            int tempExtra = 0; // temp chars after destLimit
164
165            for (oOutput=0; oOutput<output.length(); ) {
166                if (oOutput == cursorPos) {
167                    // Record the position of the cursor
168                    newStart = buf.length() + destLimit - destStart; // relative to start
169                    // the buf.length() was inserted for bug 5789
170                    // the problem is that if we are accumulating into a buffer (when r == null below)
171                    // then the actual length of the text at that point needs to add the buf length.
172                    // there was an alternative suggested in #5789, but that looks like it won't work
173                    // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
174                }
175                int c = UTF16.charAt(output, oOutput);
176
177                // When we are at the last position copy the right style
178                // context character into the temporary buffer.  We don't
179                // do this before because it will provide an incorrect
180                // right context for previous replace() operations.
181                int nextIndex = oOutput + UTF16.getCharCount(c);
182                if (nextIndex == output.length()) {
183                    tempExtra = UTF16.getCharCount(text.char32At(limit));
184                    text.copy(limit, limit+tempExtra, destLimit);
185                }
186
187                UnicodeReplacer r = data.lookupReplacer(c);
188                if (r == null) {
189                    // Accumulate straight (non-segment) text.
190                    UTF16.append(buf, c);
191                } else {
192                    isComplex = true;
193
194                    // Insert any accumulated straight text.
195                    if (buf.length() > 0) {
196                        text.replace(destLimit, destLimit, buf.toString());
197                        destLimit += buf.length();
198                        buf.setLength(0);
199                    }
200
201                    // Delegate output generation to replacer object
202                    int len = r.replace(text, destLimit, destLimit, cursor);
203                    destLimit += len;
204                }
205                oOutput = nextIndex;
206            }
207            // Insert any accumulated straight text.
208            if (buf.length() > 0) {
209                text.replace(destLimit, destLimit, buf.toString());
210                destLimit += buf.length();
211            }
212            if (oOutput == cursorPos) {
213                // Record the position of the cursor
214                newStart = destLimit - destStart; // relative to start
215            }
216
217            outLen = destLimit - destStart;
218
219            // Copy new text to start, and delete it
220            text.copy(destStart, destLimit, start);
221            text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
222
223            // Delete the old text (the key)
224            text.replace(start + outLen, limit + outLen, "");
225        }
226
227        if (hasCursor) {
228            // Adjust the cursor for positions outside the key.  These
229            // refer to code points rather than code units.  If cursorPos
230            // is within the output string, then use newStart, which has
231            // already been set above.
232            if (cursorPos < 0) {
233                newStart = start;
234                int n = cursorPos;
235                // Outside the output string, cursorPos counts code points
236                while (n < 0 && newStart > 0) {
237                    newStart -= UTF16.getCharCount(text.char32At(newStart-1));
238                    ++n;
239                }
240                newStart += n;
241            } else if (cursorPos > output.length()) {
242                newStart = start + outLen;
243                int n = cursorPos - output.length();
244                // Outside the output string, cursorPos counts code points
245                while (n > 0 && newStart < text.length()) {
246                    newStart += UTF16.getCharCount(text.char32At(newStart));
247                    --n;
248                }
249                newStart += n;
250            } else {
251                // Cursor is within output string.  It has been set up above
252                // to be relative to start.
253                newStart += start;
254            }
255
256            cursor[0] = newStart;
257        }
258
259        return outLen;
260    }
261
262    /**
263     * UnicodeReplacer API
264     */
265    public String toReplacerPattern(boolean escapeUnprintable) {
266        StringBuffer rule = new StringBuffer();
267        StringBuffer quoteBuf = new StringBuffer();
268
269        int cursor = cursorPos;
270
271        // Handle a cursor preceding the output
272        if (hasCursor && cursor < 0) {
273            while (cursor++ < 0) {
274                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
275            }
276            // Fall through and append '|' below
277        }
278
279        for (int i=0; i<output.length(); ++i) {
280            if (hasCursor && i == cursor) {
281                Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
282            }
283            char c = output.charAt(i); // Ok to use 16-bits here
284
285            UnicodeReplacer r = data.lookupReplacer(c);
286            if (r == null) {
287                Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
288            } else {
289                StringBuffer buf = new StringBuffer(" ");
290                buf.append(r.toReplacerPattern(escapeUnprintable));
291                buf.append(' ');
292                Utility.appendToRule(rule, buf.toString(),
293                                     true, escapeUnprintable, quoteBuf);
294            }
295        }
296
297        // Handle a cursor after the output.  Use > rather than >= because
298        // if cursor == output.length() it is at the end of the output,
299        // which is the default position, so we need not emit it.
300        if (hasCursor && cursor > output.length()) {
301            cursor -= output.length();
302            while (cursor-- > 0) {
303                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
304            }
305            Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
306        }
307        // Flush quoteBuf out to result
308        Utility.appendToRule(rule, -1,
309                             true, escapeUnprintable, quoteBuf);
310
311        return rule.toString();
312    }
313
314    /**
315     * Union the set of all characters that may output by this object
316     * into the given set.
317     * @param toUnionTo the set into which to union the output characters
318     */
319    public void addReplacementSetTo(UnicodeSet toUnionTo) {
320        int ch;
321        for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
322            ch = UTF16.charAt(output, i);
323            UnicodeReplacer r = data.lookupReplacer(ch);
324            if (r == null) {
325                toUnionTo.add(ch);
326            } else {
327                r.addReplacementSetTo(toUnionTo);
328            }
329        }
330    }
331}
332
333//eof
334