17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert********************************************************************** 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (c) 2002-2007, International Business Machines Corporation 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* and others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert********************************************************************** 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Date Name Description 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* 01/14/2002 aliu Creation. 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert********************************************************************** 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.text; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A replacer that produces static text as its output. The text may 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * contain transliterator stand-in characters that represent nested 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeReplacer objects, making it possible to encode a tree of 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replacers in a StringReplacer. A StringReplacer that contains such 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * stand-ins is called a <em>complex</em> StringReplacer. A complex 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * StringReplacer has a slower processing loop than a non-complex one. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Alan Liu 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertclass StringReplacer implements UnicodeReplacer { 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Output text, possibly containing stand-in characters that 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * represent nested UnicodeReplacers. 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private String output; 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Cursor position. Value is ignored if hasCursor is false. 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int cursorPos; 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * True if this object outputs a cursor position. 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private boolean hasCursor; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * A complex object contains nested replacers and requires more 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * complex processing. StringReplacers are initially assumed to 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * be complex. If no nested replacers are seen during processing, 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * then isComplex is set to false, and future replacements are 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * short circuited for better performance. 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private boolean isComplex; 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Object that translates stand-in characters in 'output' to 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeReplacer objects. 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final RuleBasedTransliterator.Data data; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Construct a StringReplacer that sets the emits the given output 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text and sets the cursor to the given position. 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param theOutput text that will replace input text when the 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replace() method is called. May contain stand-in characters 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that represent nested replacers. 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param theCursorPos cursor position that will be returned by 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the replace() method 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param theData transliterator context object that translates 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * stand-in characters to UnicodeReplacer objects 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public StringReplacer(String theOutput, 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int theCursorPos, 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedTransliterator.Data theData) { 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert output = theOutput; 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cursorPos = theCursorPos; 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert hasCursor = true; 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data = theData; 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isComplex = true; 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Construct a StringReplacer that sets the emits the given output 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * text and does not modify the cursor. 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param theOutput text that will replace input text when the 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * replace() method is called. May contain stand-in characters 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * that represent nested replacers. 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param theData transliterator context object that translates 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * stand-in characters to UnicodeReplacer objects 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public StringReplacer(String theOutput, 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert RuleBasedTransliterator.Data theData) { 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert output = theOutput; 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cursorPos = 0; 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert hasCursor = false; 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert data = theData; 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isComplex = true; 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= public static UnicodeReplacer valueOf(String output, 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= int cursorPos, 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= RuleBasedTransliterator.Data data) { 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= if (output.length() == 1) { 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= char c = output.charAt(0); 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= UnicodeReplacer r = data.lookupReplacer(c); 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= if (r != null) { 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= return r; 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= } 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= } 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= return new StringReplacer(output, cursorPos, data); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//= } 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeReplacer API 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int replace(Replaceable text, 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int start, 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit, 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int[] cursor) { 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int outLen; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int newStart = 0; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // NOTE: It should be possible to _always_ run the complex 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // processing code; just slower. If not, then there is a bug 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // in the complex processing code. 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Simple (no nested replacers) Processing Code : 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!isComplex) { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(start, limit, output); 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert outLen = output.length(); 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Setup default cursor position (for cursorPos within output) 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart = cursorPos; 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Complex (nested replacers) Processing Code : 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert else { 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /* When there are segments to be copied, use the Replaceable.copy() 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * API in order to retain out-of-band data. Copy everything to the 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * end of the string, then copy them back over the key. This preserves 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the integrity of indices into the key and surrounding context while 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * generating the output text. 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer buf = new StringBuffer(); 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int oOutput; // offset into 'output' 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isComplex = false; 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The temporary buffer starts at tempStart, and extends 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to destLimit + tempExtra. The start of the buffer has a single 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // character from before the key. This provides style 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // data when addition characters are filled into the 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // temporary buffer. If there is nothing to the left, use 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the non-character U+FFFF, which Replaceable subclasses 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // should treat specially as a "no-style character." 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // destStart points to the point after the style context 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // character, so it is tempStart+1 or tempStart+2. 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int tempStart = text.length(); // start of temp buffer 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int destStart = tempStart; // copy new text to here 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (start > 0) { 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len = UTF16.getCharCount(text.char32At(start-1)); 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.copy(start-len, start, tempStart); 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert destStart += len; 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(tempStart, tempStart, "\uFFFF"); 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert destStart++; 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int destLimit = destStart; 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int tempExtra = 0; // temp chars after destLimit 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (oOutput=0; oOutput<output.length(); ) { 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oOutput == cursorPos) { 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Record the position of the cursor 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart = buf.length() + destLimit - destStart; // relative to start 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the buf.length() was inserted for bug 5789 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the problem is that if we are accumulating into a buffer (when r == null below) 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // then the actual length of the text at that point needs to add the buf length. 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // there was an alternative suggested in #5789, but that looks like it won't work 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if we have accumulated some stuff in the dest part AND have a non-zero buffer. 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int c = UTF16.charAt(output, oOutput); 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // When we are at the last position copy the right style 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // context character into the temporary buffer. We don't 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // do this before because it will provide an incorrect 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // right context for previous replace() operations. 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int nextIndex = oOutput + UTF16.getCharCount(c); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (nextIndex == output.length()) { 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert tempExtra = UTF16.getCharCount(text.char32At(limit)); 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.copy(limit, limit+tempExtra, destLimit); 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeReplacer r = data.lookupReplacer(c); 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (r == null) { 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Accumulate straight (non-segment) text. 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UTF16.append(buf, c); 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert isComplex = true; 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Insert any accumulated straight text. 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (buf.length() > 0) { 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(destLimit, destLimit, buf.toString()); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert destLimit += buf.length(); 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buf.setLength(0); 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delegate output generation to replacer object 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len = r.replace(text, destLimit, destLimit, cursor); 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert destLimit += len; 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert oOutput = nextIndex; 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Insert any accumulated straight text. 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (buf.length() > 0) { 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(destLimit, destLimit, buf.toString()); 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert destLimit += buf.length(); 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (oOutput == cursorPos) { 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Record the position of the cursor 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart = destLimit - destStart; // relative to start 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert outLen = destLimit - destStart; 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Copy new text to start, and delete it 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.copy(destStart, destLimit, start); 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Delete the old text (the key) 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert text.replace(start + outLen, limit + outLen, ""); 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (hasCursor) { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Adjust the cursor for positions outside the key. These 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // refer to code points rather than code units. If cursorPos 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // is within the output string, then use newStart, which has 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // already been set above. 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (cursorPos < 0) { 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart = start; 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int n = cursorPos; 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Outside the output string, cursorPos counts code points 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (n < 0 && newStart > 0) { 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart -= UTF16.getCharCount(text.char32At(newStart-1)); 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++n; 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart += n; 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (cursorPos > output.length()) { 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart = start + outLen; 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int n = cursorPos - output.length(); 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Outside the output string, cursorPos counts code points 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (n > 0 && newStart < text.length()) { 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart += UTF16.getCharCount(text.char32At(newStart)); 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --n; 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart += n; 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Cursor is within output string. It has been set up above 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to be relative to start. 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert newStart += start; 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cursor[0] = newStart; 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return outLen; 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * UnicodeReplacer API 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toReplacerPattern(boolean escapeUnprintable) { 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer rule = new StringBuffer(); 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer quoteBuf = new StringBuffer(); 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int cursor = cursorPos; 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Handle a cursor preceding the output 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (hasCursor && cursor < 0) { 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (cursor++ < 0) { 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fall through and append '|' below 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<output.length(); ++i) { 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (hasCursor && i == cursor) { 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = output.charAt(i); // Ok to use 16-bits here 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeReplacer r = data.lookupReplacer(c); 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (r == null) { 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf); 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuffer buf = new StringBuffer(" "); 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buf.append(r.toReplacerPattern(escapeUnprintable)); 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buf.append(' '); 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, buf.toString(), 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert true, escapeUnprintable, quoteBuf); 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Handle a cursor after the output. Use > rather than >= because 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if cursor == output.length() it is at the end of the output, 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which is the default position, so we need not emit it. 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (hasCursor && cursor > output.length()) { 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert cursor -= output.length(); 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (cursor-- > 0) { 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Flush quoteBuf out to result 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Utility.appendToRule(rule, -1, 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert true, escapeUnprintable, quoteBuf); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return rule.toString(); 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Union the set of all characters that may output by this object 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * into the given set. 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param toUnionTo the set into which to union the output characters 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void addReplacementSetTo(UnicodeSet toUnionTo) { 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int ch; 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) { 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ch = UTF16.charAt(output, i); 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert UnicodeReplacer r = data.lookupReplacer(ch); 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (r == null) { 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert toUnionTo.add(ch); 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert r.addReplacementSetTo(toUnionTo); 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert//eof 334