1/* 2********************************************************************** 3* Copyright (c) 2002-2007, International Business Machines Corporation 4* and others. All Rights Reserved. 5********************************************************************** 6* Date Name Description 7* 01/14/2002 aliu Creation. 8********************************************************************** 9*/ 10 11package com.ibm.icu.text; 12import com.ibm.icu.impl.Utility; 13 14/** 15 * A replacer that produces static text as its output. The text may 16 * contain transliterator stand-in characters that represent nested 17 * UnicodeReplacer objects, making it possible to encode a tree of 18 * replacers in a StringReplacer. A StringReplacer that contains such 19 * stand-ins is called a <em>complex</em> StringReplacer. A complex 20 * StringReplacer has a slower processing loop than a non-complex one. 21 * @author Alan Liu 22 */ 23class StringReplacer implements UnicodeReplacer { 24 25 /** 26 * Output text, possibly containing stand-in characters that 27 * represent nested UnicodeReplacers. 28 */ 29 private String output; 30 31 /** 32 * Cursor position. Value is ignored if hasCursor is false. 33 */ 34 private int cursorPos; 35 36 /** 37 * True if this object outputs a cursor position. 38 */ 39 private boolean hasCursor; 40 41 /** 42 * A complex object contains nested replacers and requires more 43 * complex processing. StringReplacers are initially assumed to 44 * be complex. If no nested replacers are seen during processing, 45 * then isComplex is set to false, and future replacements are 46 * short circuited for better performance. 47 */ 48 private boolean isComplex; 49 50 /** 51 * Object that translates stand-in characters in 'output' to 52 * UnicodeReplacer objects. 53 */ 54 private final RuleBasedTransliterator.Data data; 55 56 /** 57 * Construct a StringReplacer that sets the emits the given output 58 * text and sets the cursor to the given position. 59 * @param theOutput text that will replace input text when the 60 * replace() method is called. May contain stand-in characters 61 * that represent nested replacers. 62 * @param theCursorPos cursor position that will be returned by 63 * the replace() method 64 * @param theData transliterator context object that translates 65 * stand-in characters to UnicodeReplacer objects 66 */ 67 public StringReplacer(String theOutput, 68 int theCursorPos, 69 RuleBasedTransliterator.Data theData) { 70 output = theOutput; 71 cursorPos = theCursorPos; 72 hasCursor = true; 73 data = theData; 74 isComplex = true; 75 } 76 77 /** 78 * Construct a StringReplacer that sets the emits the given output 79 * text and does not modify the cursor. 80 * @param theOutput text that will replace input text when the 81 * replace() method is called. May contain stand-in characters 82 * that represent nested replacers. 83 * @param theData transliterator context object that translates 84 * stand-in characters to UnicodeReplacer objects 85 */ 86 public StringReplacer(String theOutput, 87 RuleBasedTransliterator.Data theData) { 88 output = theOutput; 89 cursorPos = 0; 90 hasCursor = false; 91 data = theData; 92 isComplex = true; 93 } 94 95//= public static UnicodeReplacer valueOf(String output, 96//= int cursorPos, 97//= RuleBasedTransliterator.Data data) { 98//= if (output.length() == 1) { 99//= char c = output.charAt(0); 100//= UnicodeReplacer r = data.lookupReplacer(c); 101//= if (r != null) { 102//= return r; 103//= } 104//= } 105//= return new StringReplacer(output, cursorPos, data); 106//= } 107 108 /** 109 * UnicodeReplacer API 110 */ 111 public int replace(Replaceable text, 112 int start, 113 int limit, 114 int[] cursor) { 115 int outLen; 116 int newStart = 0; 117 118 // NOTE: It should be possible to _always_ run the complex 119 // processing code; just slower. If not, then there is a bug 120 // in the complex processing code. 121 122 // Simple (no nested replacers) Processing Code : 123 if (!isComplex) { 124 text.replace(start, limit, output); 125 outLen = output.length(); 126 127 // Setup default cursor position (for cursorPos within output) 128 newStart = cursorPos; 129 } 130 131 // Complex (nested replacers) Processing Code : 132 else { 133 /* When there are segments to be copied, use the Replaceable.copy() 134 * API in order to retain out-of-band data. Copy everything to the 135 * end of the string, then copy them back over the key. This preserves 136 * the integrity of indices into the key and surrounding context while 137 * generating the output text. 138 */ 139 StringBuffer buf = new StringBuffer(); 140 int oOutput; // offset into 'output' 141 isComplex = false; 142 143 // The temporary buffer starts at tempStart, and extends 144 // to destLimit + tempExtra. The start of the buffer has a single 145 // character from before the key. This provides style 146 // data when addition characters are filled into the 147 // temporary buffer. If there is nothing to the left, use 148 // the non-character U+FFFF, which Replaceable subclasses 149 // should treat specially as a "no-style character." 150 // destStart points to the point after the style context 151 // character, so it is tempStart+1 or tempStart+2. 152 int tempStart = text.length(); // start of temp buffer 153 int destStart = tempStart; // copy new text to here 154 if (start > 0) { 155 int len = UTF16.getCharCount(text.char32At(start-1)); 156 text.copy(start-len, start, tempStart); 157 destStart += len; 158 } else { 159 text.replace(tempStart, tempStart, "\uFFFF"); 160 destStart++; 161 } 162 int destLimit = destStart; 163 int tempExtra = 0; // temp chars after destLimit 164 165 for (oOutput=0; oOutput<output.length(); ) { 166 if (oOutput == cursorPos) { 167 // Record the position of the cursor 168 newStart = buf.length() + destLimit - destStart; // relative to start 169 // the buf.length() was inserted for bug 5789 170 // the problem is that if we are accumulating into a buffer (when r == null below) 171 // then the actual length of the text at that point needs to add the buf length. 172 // there was an alternative suggested in #5789, but that looks like it won't work 173 // if we have accumulated some stuff in the dest part AND have a non-zero buffer. 174 } 175 int c = UTF16.charAt(output, oOutput); 176 177 // When we are at the last position copy the right style 178 // context character into the temporary buffer. We don't 179 // do this before because it will provide an incorrect 180 // right context for previous replace() operations. 181 int nextIndex = oOutput + UTF16.getCharCount(c); 182 if (nextIndex == output.length()) { 183 tempExtra = UTF16.getCharCount(text.char32At(limit)); 184 text.copy(limit, limit+tempExtra, destLimit); 185 } 186 187 UnicodeReplacer r = data.lookupReplacer(c); 188 if (r == null) { 189 // Accumulate straight (non-segment) text. 190 UTF16.append(buf, c); 191 } else { 192 isComplex = true; 193 194 // Insert any accumulated straight text. 195 if (buf.length() > 0) { 196 text.replace(destLimit, destLimit, buf.toString()); 197 destLimit += buf.length(); 198 buf.setLength(0); 199 } 200 201 // Delegate output generation to replacer object 202 int len = r.replace(text, destLimit, destLimit, cursor); 203 destLimit += len; 204 } 205 oOutput = nextIndex; 206 } 207 // Insert any accumulated straight text. 208 if (buf.length() > 0) { 209 text.replace(destLimit, destLimit, buf.toString()); 210 destLimit += buf.length(); 211 } 212 if (oOutput == cursorPos) { 213 // Record the position of the cursor 214 newStart = destLimit - destStart; // relative to start 215 } 216 217 outLen = destLimit - destStart; 218 219 // Copy new text to start, and delete it 220 text.copy(destStart, destLimit, start); 221 text.replace(tempStart + outLen, destLimit + tempExtra + outLen, ""); 222 223 // Delete the old text (the key) 224 text.replace(start + outLen, limit + outLen, ""); 225 } 226 227 if (hasCursor) { 228 // Adjust the cursor for positions outside the key. These 229 // refer to code points rather than code units. If cursorPos 230 // is within the output string, then use newStart, which has 231 // already been set above. 232 if (cursorPos < 0) { 233 newStart = start; 234 int n = cursorPos; 235 // Outside the output string, cursorPos counts code points 236 while (n < 0 && newStart > 0) { 237 newStart -= UTF16.getCharCount(text.char32At(newStart-1)); 238 ++n; 239 } 240 newStart += n; 241 } else if (cursorPos > output.length()) { 242 newStart = start + outLen; 243 int n = cursorPos - output.length(); 244 // Outside the output string, cursorPos counts code points 245 while (n > 0 && newStart < text.length()) { 246 newStart += UTF16.getCharCount(text.char32At(newStart)); 247 --n; 248 } 249 newStart += n; 250 } else { 251 // Cursor is within output string. It has been set up above 252 // to be relative to start. 253 newStart += start; 254 } 255 256 cursor[0] = newStart; 257 } 258 259 return outLen; 260 } 261 262 /** 263 * UnicodeReplacer API 264 */ 265 public String toReplacerPattern(boolean escapeUnprintable) { 266 StringBuffer rule = new StringBuffer(); 267 StringBuffer quoteBuf = new StringBuffer(); 268 269 int cursor = cursorPos; 270 271 // Handle a cursor preceding the output 272 if (hasCursor && cursor < 0) { 273 while (cursor++ < 0) { 274 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 275 } 276 // Fall through and append '|' below 277 } 278 279 for (int i=0; i<output.length(); ++i) { 280 if (hasCursor && i == cursor) { 281 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 282 } 283 char c = output.charAt(i); // Ok to use 16-bits here 284 285 UnicodeReplacer r = data.lookupReplacer(c); 286 if (r == null) { 287 Utility.appendToRule(rule, c, false, escapeUnprintable, quoteBuf); 288 } else { 289 StringBuffer buf = new StringBuffer(" "); 290 buf.append(r.toReplacerPattern(escapeUnprintable)); 291 buf.append(' '); 292 Utility.appendToRule(rule, buf.toString(), 293 true, escapeUnprintable, quoteBuf); 294 } 295 } 296 297 // Handle a cursor after the output. Use > rather than >= because 298 // if cursor == output.length() it is at the end of the output, 299 // which is the default position, so we need not emit it. 300 if (hasCursor && cursor > output.length()) { 301 cursor -= output.length(); 302 while (cursor-- > 0) { 303 Utility.appendToRule(rule, '@', true, escapeUnprintable, quoteBuf); 304 } 305 Utility.appendToRule(rule, '|', true, escapeUnprintable, quoteBuf); 306 } 307 // Flush quoteBuf out to result 308 Utility.appendToRule(rule, -1, 309 true, escapeUnprintable, quoteBuf); 310 311 return rule.toString(); 312 } 313 314 /** 315 * Union the set of all characters that may output by this object 316 * into the given set. 317 * @param toUnionTo the set into which to union the output characters 318 */ 319 public void addReplacementSetTo(UnicodeSet toUnionTo) { 320 int ch; 321 for (int i=0; i<output.length(); i+=UTF16.getCharCount(ch)) { 322 ch = UTF16.charAt(output, i); 323 UnicodeReplacer r = data.lookupReplacer(ch); 324 if (r == null) { 325 toUnionTo.add(ch); 326 } else { 327 r.addReplacementSetTo(toUnionTo); 328 } 329 } 330 } 331} 332 333//eof 334