17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert**********************************************************************
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   Copyright (c) 2002-2007, International Business Machines Corporation
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   and others.  All Rights Reserved.
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert**********************************************************************
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   Date        Name        Description
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*   01/14/2002  aliu        Creation.
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert**********************************************************************
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/**
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A replacer that produces static text as its output.  The text may
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contain transliterator stand-in characters that represent nested
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeReplacer objects, making it possible to encode a tree of
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacers in a StringReplacer.  A StringReplacer that contains such
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * stand-ins is called a <em>complex</em> StringReplacer.  A complex
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * StringReplacer has a slower processing loop than a non-complex one.
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass StringReplacer implements UnicodeReplacer {
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Output text, possibly containing stand-in characters that
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * represent nested UnicodeReplacers.
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private String output;
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Cursor position.  Value is ignored if hasCursor is false.
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private int cursorPos;
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * True if this object outputs a cursor position.
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean hasCursor;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * A complex object contains nested replacers and requires more
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * complex processing.  StringReplacers are initially assumed to
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * be complex.  If no nested replacers are seen during processing,
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * then isComplex is set to false, and future replacements are
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * short circuited for better performance.
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private boolean isComplex;
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Object that translates stand-in characters in 'output' to
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * UnicodeReplacer objects.
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private final RuleBasedTransliterator.Data data;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Construct a StringReplacer that sets the emits the given output
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and sets the cursor to the given position.
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param theOutput text that will replace input text when the
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * replace() method is called.  May contain stand-in characters
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that represent nested replacers.
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param theCursorPos cursor position that will be returned by
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * the replace() method
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param theData transliterator context object that translates
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * stand-in characters to UnicodeReplacer objects
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringReplacer(String theOutput,
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          int theCursorPos,
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          RuleBasedTransliterator.Data theData) {
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        output = theOutput;
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cursorPos = theCursorPos;
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        hasCursor = true;
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data = theData;
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        isComplex = true;
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Construct a StringReplacer that sets the emits the given output
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * text and does not modify the cursor.
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param theOutput text that will replace input text when the
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * replace() method is called.  May contain stand-in characters
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * that represent nested replacers.
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param theData transliterator context object that translates
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * stand-in characters to UnicodeReplacer objects
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public StringReplacer(String theOutput,
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                          RuleBasedTransliterator.Data theData) {
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        output = theOutput;
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        cursorPos = 0;
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        hasCursor = false;
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        data = theData;
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        isComplex = true;
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=    public static UnicodeReplacer valueOf(String output,
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=                                          int cursorPos,
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=                                          RuleBasedTransliterator.Data data) {
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=        if (output.length() == 1) {
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=            char c = output.charAt(0);
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=            UnicodeReplacer r = data.lookupReplacer(c);
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=            if (r != null) {
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=                return r;
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=            }
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=        }
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=        return new StringReplacer(output, cursorPos, data);
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//=    }
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * UnicodeReplacer API
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int replace(Replaceable text,
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int start,
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int limit,
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                       int[] cursor) {
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int outLen;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int newStart = 0;
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // NOTE: It should be possible to _always_ run the complex
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // processing code; just slower.  If not, then there is a bug
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // in the complex processing code.
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Simple (no nested replacers) Processing Code :
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (!isComplex) {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.replace(start, limit, output);
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            outLen = output.length();
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Setup default cursor position (for cursorPos within output)
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            newStart = cursorPos;
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Complex (nested replacers) Processing Code :
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        else {
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /* When there are segments to be copied, use the Replaceable.copy()
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * API in order to retain out-of-band data.  Copy everything to the
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * end of the string, then copy them back over the key.  This preserves
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * the integrity of indices into the key and surrounding context while
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * generating the output text.
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuffer buf = new StringBuffer();
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int oOutput; // offset into 'output'
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            isComplex = false;
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // The temporary buffer starts at tempStart, and extends
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // to destLimit + tempExtra.  The start of the buffer has a single
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // character from before the key.  This provides style
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // data when addition characters are filled into the
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // temporary buffer.  If there is nothing to the left, use
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the non-character U+FFFF, which Replaceable subclasses
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // should treat specially as a "no-style character."
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // destStart points to the point after the style context
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // character, so it is tempStart+1 or tempStart+2.
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int tempStart = text.length(); // start of temp buffer
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int destStart = tempStart; // copy new text to here
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (start > 0) {
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int len = UTF16.getCharCount(text.char32At(start-1));
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.copy(start-len, start, tempStart);
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                destStart += len;
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.replace(tempStart, tempStart, "\uFFFF");
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                destStart++;
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int destLimit = destStart;
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int tempExtra = 0; // temp chars after destLimit
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (oOutput=0; oOutput<output.length(); ) {
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (oOutput == cursorPos) {
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Record the position of the cursor
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    newStart = buf.length() + destLimit - destStart; // relative to start
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // the buf.length() was inserted for bug 5789
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // the problem is that if we are accumulating into a buffer (when r == null below)
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // then the actual length of the text at that point needs to add the buf length.
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // there was an alternative suggested in #5789, but that looks like it won't work
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // if we have accumulated some stuff in the dest part AND have a non-zero buffer.
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int c = UTF16.charAt(output, oOutput);
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // When we are at the last position copy the right style
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // context character into the temporary buffer.  We don't
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // do this before because it will provide an incorrect
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // right context for previous replace() operations.
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int nextIndex = oOutput + UTF16.getCharCount(c);
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (nextIndex == output.length()) {
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    tempExtra = UTF16.getCharCount(text.char32At(limit));
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    text.copy(limit, limit+tempExtra, destLimit);
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                UnicodeReplacer r = data.lookupReplacer(c);
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (r == null) {
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Accumulate straight (non-segment) text.
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    UTF16.append(buf, c);
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    isComplex = true;
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Insert any accumulated straight text.
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (buf.length() > 0) {
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        text.replace(destLimit, destLimit, buf.toString());
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        destLimit += buf.length();
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        buf.setLength(0);
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    // Delegate output generation to replacer object
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int len = r.replace(text, destLimit, destLimit, cursor);
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    destLimit += len;
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                oOutput = nextIndex;
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Insert any accumulated straight text.
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (buf.length() > 0) {
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                text.replace(destLimit, destLimit, buf.toString());
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                destLimit += buf.length();
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (oOutput == cursorPos) {
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Record the position of the cursor
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart = destLimit - destStart; // relative to start
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            outLen = destLimit - destStart;
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Copy new text to start, and delete it
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.copy(destStart, destLimit, start);
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.replace(tempStart + outLen, destLimit + tempExtra + outLen, "");
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Delete the old text (the key)
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            text.replace(start + outLen, limit + outLen, "");
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (hasCursor) {
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Adjust the cursor for positions outside the key.  These
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // refer to code points rather than code units.  If cursorPos
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // is within the output string, then use newStart, which has
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // already been set above.
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (cursorPos < 0) {
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart = start;
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int n = cursorPos;
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Outside the output string, cursorPos counts code points
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (n < 0 && newStart > 0) {
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    newStart -= UTF16.getCharCount(text.char32At(newStart-1));
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    ++n;
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart += n;
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (cursorPos > output.length()) {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart = start + outLen;
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int n = cursorPos - output.length();
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Outside the output string, cursorPos counts code points
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (n > 0 && newStart < text.length()) {
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    newStart += UTF16.getCharCount(text.char32At(newStart));
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    --n;
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart += n;
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // Cursor is within output string.  It has been set up above
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                // to be relative to start.
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                newStart += start;
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cursor[0] = newStart;
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return outLen;
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * UnicodeReplacer API
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public String toReplacerPattern(boolean escapeUnprintable) {
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer rule = new StringBuffer();
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuffer quoteBuf = new StringBuffer();
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int cursor = cursorPos;
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Handle a cursor preceding the output
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (hasCursor && cursor < 0) {
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (cursor++ < 0) {
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Fall through and append '|' below
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i=0; i<output.length(); ++i) {
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (hasCursor && i == cursor) {
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char c = output.charAt(i); // Ok to use 16-bits here
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeReplacer r = data.lookupReplacer(c);
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (r == null) {
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf);
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                StringBuffer buf = new StringBuffer(" ");
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf.append(r.toReplacerPattern(escapeUnprintable));
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buf.append(' ');
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Utility.appendToRule(rule, buf.toString(),
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                     true, escapeUnprintable, quoteBuf);
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Handle a cursor after the output.  Use > rather than >= because
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // if cursor == output.length() it is at the end of the output,
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // which is the default position, so we need not emit it.
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        if (hasCursor && cursor > output.length()) {
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            cursor -= output.length();
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            while (cursor-- > 0) {
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf);
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf);
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        // Flush quoteBuf out to result
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Utility.appendToRule(rule, -1,
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                             true, escapeUnprintable, quoteBuf);
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return rule.toString();
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Union the set of all characters that may output by this object
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * into the given set.
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * @param toUnionTo the set into which to union the output characters
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public void addReplacementSetTo(UnicodeSet toUnionTo) {
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        int ch;
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) {
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            ch = UTF16.charAt(output, i);
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            UnicodeReplacer r = data.lookupReplacer(ch);
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (r == null) {
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                toUnionTo.add(ch);
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                r.addReplacementSetTo(toUnionTo);
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//eof
334