1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4********************************************************************** 5* Copyright (c) 2002-2007, International Business Machines Corporation 6* and others. All Rights Reserved. 7********************************************************************** 8* Date Name Description 9* 01/14/2002 aliu Creation. 10********************************************************************** 11*/ 12 13package com.ibm.icu.text; 14import com.ibm.icu.impl.Utility; 15 16/** 17 * A replacer that produces static text as its output. The text may 18 * contain transliterator stand-in characters that represent nested 19 * UnicodeReplacer objects, making it possible to encode a tree of 20 * replacers in a StringReplacer. A StringReplacer that contains such 21 * stand-ins is called a <em>complex</em> StringReplacer. A complex 22 * StringReplacer has a slower processing loop than a non-complex one. 23 * @author Alan Liu 24 */ 25class StringReplacer implements UnicodeReplacer { 26 27 /** 28 * Output text, possibly containing stand-in characters that 29 * represent nested UnicodeReplacers. 30 */ 31 private String output; 32 33 /** 34 * Cursor position. Value is ignored if hasCursor is false. 35 */ 36 private int cursorPos; 37 38 /** 39 * True if this object outputs a cursor position. 40 */ 41 private boolean hasCursor; 42 43 /** 44 * A complex object contains nested replacers and requires more 45 * complex processing. StringReplacers are initially assumed to 46 * be complex. If no nested replacers are seen during processing, 47 * then isComplex is set to false, and future replacements are 48 * short circuited for better performance. 49 */ 50 private boolean isComplex; 51 52 /** 53 * Object that translates stand-in characters in 'output' to 54 * UnicodeReplacer objects. 55 */ 56 private final RuleBasedTransliterator.Data data; 57 58 /** 59 * Construct a StringReplacer that sets the emits the given output 60 * text and sets the cursor to the given position. 61 * @param theOutput text that will replace input text when the 62 * replace() method is called. May contain stand-in characters 63 * that represent nested replacers. 64 * @param theCursorPos cursor position that will be returned by 65 * the replace() method 66 * @param theData transliterator context object that translates 67 * stand-in characters to UnicodeReplacer objects 68 */ 69 public StringReplacer(String theOutput, 70 int theCursorPos, 71 RuleBasedTransliterator.Data theData) { 72 output = theOutput; 73 cursorPos = theCursorPos; 74 hasCursor = true; 75 data = theData; 76 isComplex = true; 77 } 78 79 /** 80 * Construct a StringReplacer that sets the emits the given output 81 * text and does not modify the cursor. 82 * @param theOutput text that will replace input text when the 83 * replace() method is called. May contain stand-in characters 84 * that represent nested replacers. 85 * @param theData transliterator context object that translates 86 * stand-in characters to UnicodeReplacer objects 87 */ 88 public StringReplacer(String theOutput, 89 RuleBasedTransliterator.Data theData) { 90 output = theOutput; 91 cursorPos = 0; 92 hasCursor = false; 93 data = theData; 94 isComplex = true; 95 } 96 97//= public static UnicodeReplacer valueOf(String output, 98//= int cursorPos, 99//= RuleBasedTransliterator.Data data) { 100//= if (output.length() == 1) { 101//= char c = output.charAt(0); 102//= UnicodeReplacer r = data.lookupReplacer(c); 103//= if (r != null) { 104//= return r; 105//= } 106//= } 107//= return new StringReplacer(output, cursorPos, data); 108//= } 109 110 /** 111 * UnicodeReplacer API 112 */ 113 @Override 114 public int replace(Replaceable text, 115 int start, 116 int limit, 117 int[] cursor) { 118 int outLen; 119 int newStart = 0; 120 121 // NOTE: It should be possible to _always_ run the complex 122 // processing code; just slower. If not, then there is a bug 123 // in the complex processing code. 124 125 // Simple (no nested replacers) Processing Code : 126 if (!isComplex) { 127 text.replace(start, limit, output); 128 outLen = output.length(); 129 130 // Setup default cursor position (for cursorPos within output) 131 newStart = cursorPos; 132 } 133 134 // Complex (nested replacers) Processing Code : 135 else { 136 /* When there are segments to be copied, use the Replaceable.copy() 137 * API in order to retain out-of-band data. Copy everything to the 138 * end of the string, then copy them back over the key. This preserves 139 * the integrity of indices into the key and surrounding context while 140 * generating the output text. 141 */ 142 StringBuffer buf = new StringBuffer(); 143 int oOutput; // offset into 'output' 144 isComplex = false; 145 146 // The temporary buffer starts at tempStart, and extends 147 // to destLimit + tempExtra. The start of the buffer has a single 148 // character from before the key. This provides style 149 // data when addition characters are filled into the 150 // temporary buffer. If there is nothing to the left, use 151 // the non-character U+FFFF, which Replaceable subclasses 152 // should treat specially as a "no-style character." 153 // destStart points to the point after the style context 154 // character, so it is tempStart+1 or tempStart+2. 155 int tempStart = text.length(); // start of temp buffer 156 int destStart = tempStart; // copy new text to here 157 if (start > 0) { 158 int len = UTF16.getCharCount(text.char32At(start-1)); 159 text.copy(start-len, start, tempStart); 160 destStart += len; 161 } else { 162 text.replace(tempStart, tempStart, "\uFFFF"); 163 destStart++; 164 } 165 int destLimit = destStart; 166 int tempExtra = 0; // temp chars after destLimit 167 168 for (oOutput=0; oOutput<output.length(); ) { 169 if (oOutput == cursorPos) { 170 // Record the position of the cursor 171 newStart = buf.length() + destLimit - destStart; // relative to start 172 // the buf.length() was inserted for bug 5789 173 // the problem is that if we are accumulating into a buffer (when r == null below) 174 // then the actual length of the text at that point needs to add the buf length. 175 // there was an alternative suggested in #5789, but that looks like it won't work 176 // if we have accumulated some stuff in the dest part AND have a non-zero buffer. 177 } 178 int c = UTF16.charAt(output, oOutput); 179 180 // When we are at the last position copy the right style 181 // context character into the temporary buffer. We don't 182 // do this before because it will provide an incorrect 183 // right context for previous replace() operations. 184 int nextIndex = oOutput + UTF16.getCharCount(c); 185 if (nextIndex == output.length()) { 186 tempExtra = UTF16.getCharCount(text.char32At(limit)); 187 text.copy(limit, limit+tempExtra, destLimit); 188 } 189 190 UnicodeReplacer r = data.lookupReplacer(c); 191 if (r == null) { 192 // Accumulate straight (non-segment) text. 193 UTF16.append(buf, c); 194 } else { 195 isComplex = true; 196 197 // Insert any accumulated straight text. 198 if (buf.length() > 0) { 199 text.replace(destLimit, destLimit, buf.toString()); 200 destLimit += buf.length(); 201 buf.setLength(0); 202 } 203 204 // Delegate output generation to replacer object 205 int len = r.replace(text, destLimit, destLimit, cursor); 206 destLimit += len; 207 } 208 oOutput = nextIndex; 209 } 210 // Insert any accumulated straight text. 211 if (buf.length() > 0) { 212 text.replace(destLimit, destLimit, buf.toString()); 213 destLimit += buf.length(); 214 } 215 if (oOutput == cursorPos) { 216 // Record the position of the cursor 217 newStart = destLimit - destStart; // relative to start 218 } 219 220 outLen = destLimit - destStart; 221 222 // Copy new text to start, and delete it 223 text.copy(destStart, destLimit, start); 224 text.replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); 225 226 // Delete the old text (the key) 227 text.replace(start + outLen, limit + outLen, ""); 228 } 229 230 if (hasCursor) { 231 // Adjust the cursor for positions outside the key. These 232 // refer to code points rather than code units. If cursorPos 233 // is within the output string, then use newStart, which has 234 // already been set above. 235 if (cursorPos < 0) { 236 newStart = start; 237 int n = cursorPos; 238 // Outside the output string, cursorPos counts code points 239 while (n < 0 && newStart > 0) { 240 newStart -= UTF16.getCharCount(text.char32At(newStart-1)); 241 ++n; 242 } 243 newStart += n; 244 } else if (cursorPos > output.length()) { 245 newStart = start + outLen; 246 int n = cursorPos - output.length(); 247 // Outside the output string, cursorPos counts code points 248 while (n > 0 && newStart < text.length()) { 249 newStart += UTF16.getCharCount(text.char32At(newStart)); 250 --n; 251 } 252 newStart += n; 253 } else { 254 // Cursor is within output string. It has been set up above 255 // to be relative to start. 256 newStart += start; 257 } 258 259 cursor[0] = newStart; 260 } 261 262 return outLen; 263 } 264 265 /** 266 * UnicodeReplacer API 267 */ 268 @Override 269 public String toReplacerPattern(boolean escapeUnprintable) { 270 StringBuffer rule = new StringBuffer(); 271 StringBuffer quoteBuf = new StringBuffer(); 272 273 int cursor = cursorPos; 274 275 // Handle a cursor preceding the output 276 if (hasCursor && cursor < 0) { 277 while (cursor++ < 0) { 278 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 279 } 280 // Fall through and append '|' below 281 } 282 283 for (int i=0; i<output.length(); ++i) { 284 if (hasCursor && i == cursor) { 285 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 286 } 287 char c = output.charAt(i); // Ok to use 16-bits here 288 289 UnicodeReplacer r = data.lookupReplacer(c); 290 if (r == null) { 291 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf); 292 } else { 293 StringBuffer buf = new StringBuffer(" "); 294 buf.append(r.toReplacerPattern(escapeUnprintable)); 295 buf.append(' '); 296 Utility.appendToRule(rule, buf.toString(), 297 true, escapeUnprintable, quoteBuf); 298 } 299 } 300 301 // Handle a cursor after the output. Use > rather than >= because 302 // if cursor == output.length() it is at the end of the output, 303 // which is the default position, so we need not emit it. 304 if (hasCursor && cursor > output.length()) { 305 cursor -= output.length(); 306 while (cursor-- > 0) { 307 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 308 } 309 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 310 } 311 // Flush quoteBuf out to result 312 Utility.appendToRule(rule, -1, 313 true, escapeUnprintable, quoteBuf); 314 315 return rule.toString(); 316 } 317 318 /** 319 * Union the set of all characters that may output by this object 320 * into the given set. 321 * @param toUnionTo the set into which to union the output characters 322 */ 323 @Override 324 public void addReplacementSetTo(UnicodeSet toUnionTo) { 325 int ch; 326 for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) { 327 ch = UTF16.charAt(output, i); 328 UnicodeReplacer r = data.lookupReplacer(ch); 329 if (r == null) { 330 toUnionTo.add(ch); 331 } else { 332 r.addReplacementSetTo(toUnionTo); 333 } 334 } 335 } 336} 337 338//eof 339