1/* 2 ******************************************************************************* 3 * Copyright (C) 2014, International Business Machines Corporation and * 4 * others. All Rights Reserved. * 5 ******************************************************************************* 6 */ 7package com.ibm.icu.impl; 8 9import java.util.ArrayList; 10import java.util.List; 11 12/** 13 * Compiled version of a pattern such as "{1} was born in {0}". 14 * <p> 15 * Using SimplePatternFormatter objects is both faster and safer than adhoc replacement 16 * such as <code>pattern.replace("{0}", "Colorado").replace("{1} "Fred");</code>. 17 * They are faster because they are precompiled; they are safer because they 18 * account for curly braces escaped by apostrophe ('). 19 * 20 * Placeholders are of the form \{[0-9]+\}. If a curly brace is preceded 21 * by a single quote, it becomes a curly brace instead of the start of a 22 * placeholder. Two single quotes resolve to one single quote. 23 * <p> 24 * SimplePatternFormatter objects are immutable and can be safely cached like strings. 25 * <p> 26 * Example: 27 * <pre> 28 * SimplePatternFormatter fmt = SimplePatternFormatter.compile("{1} '{born} in {0}"); 29 * 30 * // Output: "paul {born} in england" 31 * System.out.println(fmt.format("england", "paul")); 32 * </pre> 33 */ 34public class SimplePatternFormatter { 35 private final String patternWithoutPlaceholders; 36 private final int placeholderCount; 37 38 // [0] first offset; [1] first placeholderId; [2] second offset; 39 // [3] second placeholderId etc. 40 private final int[] placeholderIdsOrderedByOffset; 41 42 private final boolean firstPlaceholderReused; 43 44 private SimplePatternFormatter(String pattern, PlaceholdersBuilder builder) { 45 this.patternWithoutPlaceholders = pattern; 46 this.placeholderIdsOrderedByOffset = 47 builder.getPlaceholderIdsOrderedByOffset(); 48 this.placeholderCount = builder.getPlaceholderCount(); 49 this.firstPlaceholderReused = builder.getFirstPlaceholderReused(); 50 } 51 52 /** 53 * Compiles a string. 54 * @param pattern The string. 55 * @return the new SimplePatternFormatter object. 56 */ 57 public static SimplePatternFormatter compile(String pattern) { 58 PlaceholdersBuilder placeholdersBuilder = new PlaceholdersBuilder(); 59 PlaceholderIdBuilder idBuilder = new PlaceholderIdBuilder(); 60 StringBuilder newPattern = new StringBuilder(); 61 State state = State.INIT; 62 for (int i = 0; i < pattern.length(); i++) { 63 char ch = pattern.charAt(i); 64 switch (state) { 65 case INIT: 66 if (ch == 0x27) { 67 state = State.APOSTROPHE; 68 } else if (ch == '{') { 69 state = State.PLACEHOLDER; 70 idBuilder.reset(); 71 } else { 72 newPattern.append(ch); 73 } 74 break; 75 case APOSTROPHE: 76 if (ch == 0x27) { 77 newPattern.append("'"); 78 } else if (ch == '{') { 79 newPattern.append("{"); 80 } else { 81 newPattern.append("'"); 82 newPattern.append(ch); 83 } 84 state = State.INIT; 85 break; 86 case PLACEHOLDER: 87 if (ch >= '0' && ch <= '9') { 88 idBuilder.add(ch); 89 } else if (ch == '}' && idBuilder.isValid()) { 90 placeholdersBuilder.add(idBuilder.getId(), newPattern.length()); 91 state = State.INIT; 92 } else { 93 newPattern.append('{'); 94 idBuilder.appendTo(newPattern); 95 newPattern.append(ch); 96 state = State.INIT; 97 } 98 break; 99 default: 100 throw new IllegalStateException(); 101 } 102 } 103 switch (state) { 104 case INIT: 105 break; 106 case APOSTROPHE: 107 newPattern.append("'"); 108 break; 109 case PLACEHOLDER: 110 newPattern.append('{'); 111 idBuilder.appendTo(newPattern); 112 break; 113 default: 114 throw new IllegalStateException(); 115 } 116 return new SimplePatternFormatter(newPattern.toString(), placeholdersBuilder); 117 118 } 119 120 /** 121 * Returns the max placeholder ID + 1. 122 */ 123 public int getPlaceholderCount() { 124 return placeholderCount; 125 } 126 127 /** 128 * Formats the given values. 129 */ 130 public String format(CharSequence... values) { 131 return formatAndAppend(new StringBuilder(), null, values).toString(); 132 } 133 134 /** 135 * Formats the given values. 136 * 137 * @param appendTo the result appended here. 138 * @param offsets position of first value in appendTo stored in offsets[0]; 139 * second in offsets[1]; third in offsets[2] etc. An offset of -1 means that the 140 * corresponding value is not in appendTo. offsets.length and values.length may 141 * differ. If offsets.length < values.length then only the first offsets are written out; 142 * If offsets.length > values.length then the extra offsets get -1. 143 * If caller is not interested in offsets, caller may pass null here. 144 * @param values the placeholder values. A placeholder value may not be the same object as 145 * appendTo. 146 * @return appendTo 147 */ 148 public StringBuilder formatAndAppend( 149 StringBuilder appendTo, int[] offsets, CharSequence... values) { 150 if (values.length < placeholderCount) { 151 throw new IllegalArgumentException("Too few values."); 152 } 153 PlaceholderValues placeholderValues = new PlaceholderValues(values); 154 if (placeholderValues.isAppendToInAnyIndexExcept(appendTo, -1)) { 155 throw new IllegalArgumentException("Parameter values cannot be the same as appendTo."); 156 } 157 formatReturningOffsetLength(appendTo, offsets, placeholderValues); 158 return appendTo; 159 } 160 161 /** 162 * Formats the given values. 163 * 164 * @param result The result is stored here overwriting any previously stored value. 165 * @param offsets position of first value in result stored in offsets[0]; 166 * second in offsets[1]; third in offsets[2] etc. An offset of -1 means that the 167 * corresponding value is not in result. offsets.length and values.length may 168 * differ. If offsets.length < values.length then only the first offsets are written out; 169 * If offsets.length > values.length then the extra offsets get -1. 170 * If caller is not interested in offsets, caller may pass null here. 171 * @param values the placeholder values. A placeholder value may be result itself in which case 172 * The previous value of result is used. 173 * @return result 174 */ 175 public StringBuilder formatAndReplace( 176 StringBuilder result, int[] offsets, CharSequence... values) { 177 if (values.length < placeholderCount) { 178 throw new IllegalArgumentException("Too few values."); 179 } 180 PlaceholderValues placeholderValues = new PlaceholderValues(values); 181 int placeholderAtStart = getUniquePlaceholderAtStart(); 182 183 // If patterns starts with a placeholder and the value for that placeholder 184 // is result, then we can may be able optimize by just appending to result. 185 if (placeholderAtStart >= 0 && values[placeholderAtStart] == result) { 186 187 // If result is the value for other placeholders, call off optimization. 188 if (placeholderValues.isAppendToInAnyIndexExcept(result, placeholderAtStart)) { 189 placeholderValues.snapshotAppendTo(result); 190 result.setLength(0); 191 formatReturningOffsetLength(result, offsets, placeholderValues); 192 return result; 193 } 194 195 // Otherwise we can optimize 196 int offsetLength = formatReturningOffsetLength(result, offsets, placeholderValues); 197 198 // We have to make the offset for the placeholderAtStart placeholder be 0. 199 // Otherwise it would be the length of the previous value of result. 200 if (offsetLength > placeholderAtStart) { 201 offsets[placeholderAtStart] = 0; 202 } 203 return result; 204 } 205 if (placeholderValues.isAppendToInAnyIndexExcept(result, -1)) { 206 placeholderValues.snapshotAppendTo(result); 207 } 208 result.setLength(0); 209 formatReturningOffsetLength(result, offsets, placeholderValues); 210 return result; 211 } 212 213 /** 214 * Formats this object using values {0}, {1} etc. Note that this is 215 * not the same as the original pattern string used to build this object. 216 */ 217 @Override 218 public String toString() { 219 String[] values = new String[this.getPlaceholderCount()]; 220 for (int i = 0; i < values.length; i++) { 221 values[i] = String.format("{%d}", i); 222 } 223 return formatAndAppend(new StringBuilder(), null, values).toString(); 224 } 225 226 /** 227 * Returns this pattern with none of the placeholders. 228 */ 229 public String getPatternWithNoPlaceholders() { 230 return patternWithoutPlaceholders; 231 } 232 233 /** 234 * Just like format, but uses placeholder values exactly as they are. 235 * A placeholder value that is the same object as appendTo is treated 236 * as the empty string. In addition, returns the length of the offsets 237 * array. Returns 0 if offsets is null. 238 */ 239 private int formatReturningOffsetLength( 240 StringBuilder appendTo, 241 int[] offsets, 242 PlaceholderValues values) { 243 int offsetLen = offsets == null ? 0 : offsets.length; 244 for (int i = 0; i < offsetLen; i++) { 245 offsets[i] = -1; 246 } 247 if (placeholderIdsOrderedByOffset.length == 0) { 248 appendTo.append(patternWithoutPlaceholders); 249 return offsetLen; 250 } 251 appendTo.append( 252 patternWithoutPlaceholders, 253 0, 254 placeholderIdsOrderedByOffset[0]); 255 setPlaceholderOffset( 256 placeholderIdsOrderedByOffset[1], 257 appendTo.length(), 258 offsets, 259 offsetLen); 260 CharSequence placeholderValue = values.get(placeholderIdsOrderedByOffset[1]); 261 if (placeholderValue != appendTo) { 262 appendTo.append(placeholderValue); 263 } 264 for (int i = 2; i < placeholderIdsOrderedByOffset.length; i += 2) { 265 appendTo.append( 266 patternWithoutPlaceholders, 267 placeholderIdsOrderedByOffset[i - 2], 268 placeholderIdsOrderedByOffset[i]); 269 setPlaceholderOffset( 270 placeholderIdsOrderedByOffset[i + 1], 271 appendTo.length(), 272 offsets, 273 offsetLen); 274 placeholderValue = values.get(placeholderIdsOrderedByOffset[i + 1]); 275 if (placeholderValue != appendTo) { 276 appendTo.append(placeholderValue); 277 } 278 } 279 appendTo.append( 280 patternWithoutPlaceholders, 281 placeholderIdsOrderedByOffset[placeholderIdsOrderedByOffset.length - 2], 282 patternWithoutPlaceholders.length()); 283 return offsetLen; 284 } 285 286 287 /** 288 * Returns the placeholder at the beginning of this pattern (e.g 3 for placeholder {3}). 289 * Returns -1 if the beginning of pattern is text or if the placeholder at beginning 290 * of this pattern is used again elsewhere in pattern. 291 */ 292 private int getUniquePlaceholderAtStart() { 293 if (placeholderIdsOrderedByOffset.length == 0 294 || firstPlaceholderReused || placeholderIdsOrderedByOffset[0] != 0) { 295 return -1; 296 } 297 return placeholderIdsOrderedByOffset[1]; 298 } 299 300 private static void setPlaceholderOffset( 301 int placeholderId, int offset, int[] offsets, int offsetLen) { 302 if (placeholderId < offsetLen) { 303 offsets[placeholderId] = offset; 304 } 305 } 306 307 private static enum State { 308 INIT, 309 APOSTROPHE, 310 PLACEHOLDER, 311 } 312 313 private static class PlaceholderIdBuilder { 314 private int id = 0; 315 private int idLen = 0; 316 317 public void reset() { 318 id = 0; 319 idLen = 0; 320 } 321 322 public int getId() { 323 return id; 324 } 325 326 public void appendTo(StringBuilder appendTo) { 327 if (idLen > 0) { 328 appendTo.append(id); 329 } 330 } 331 332 public boolean isValid() { 333 return idLen > 0; 334 } 335 336 public void add(char ch) { 337 id = id * 10 + ch - '0'; 338 idLen++; 339 } 340 } 341 342 private static class PlaceholdersBuilder { 343 private List<Integer> placeholderIdsOrderedByOffset = new ArrayList<Integer>(); 344 private int placeholderCount = 0; 345 private boolean firstPlaceholderReused = false; 346 347 public void add(int placeholderId, int offset) { 348 placeholderIdsOrderedByOffset.add(offset); 349 placeholderIdsOrderedByOffset.add(placeholderId); 350 if (placeholderId >= placeholderCount) { 351 placeholderCount = placeholderId + 1; 352 } 353 int len = placeholderIdsOrderedByOffset.size(); 354 if (len > 2 355 && placeholderIdsOrderedByOffset.get(len - 1) 356 .equals(placeholderIdsOrderedByOffset.get(1))) { 357 firstPlaceholderReused = true; 358 } 359 } 360 361 public int getPlaceholderCount() { 362 return placeholderCount; 363 } 364 365 public int[] getPlaceholderIdsOrderedByOffset() { 366 int[] result = new int[placeholderIdsOrderedByOffset.size()]; 367 for (int i = 0; i < result.length; i++) { 368 result[i] = placeholderIdsOrderedByOffset.get(i).intValue(); 369 } 370 return result; 371 } 372 373 public boolean getFirstPlaceholderReused() { 374 return firstPlaceholderReused; 375 } 376 } 377 378 /** 379 * Represents placeholder values. 380 */ 381 private static class PlaceholderValues { 382 private final CharSequence[] values; 383 private CharSequence appendTo; 384 private String appendToCopy; 385 386 public PlaceholderValues(CharSequence ...values) { 387 this.values = values; 388 this.appendTo = null; 389 this.appendToCopy = null; 390 } 391 392 /** 393 * Returns true if appendTo value is at any index besides exceptIndex. 394 */ 395 public boolean isAppendToInAnyIndexExcept(CharSequence appendTo, int exceptIndex) { 396 for (int i = 0; i < values.length; ++i) { 397 if (i != exceptIndex && values[i] == appendTo) { 398 return true; 399 } 400 } 401 return false; 402 } 403 404 /** 405 * For each appendTo value, stores the snapshot of it in its place. 406 */ 407 public void snapshotAppendTo(CharSequence appendTo) { 408 this.appendTo = appendTo; 409 this.appendToCopy = appendTo.toString(); 410 } 411 412 /** 413 * Return placeholder at given index. 414 */ 415 public CharSequence get(int index) { 416 if (appendTo == null || appendTo != values[index]) { 417 return values[index]; 418 } 419 return appendToCopy; 420 } 421 } 422 423} 424