1/* 2 ******************************************************************************* 3 * Copyright (C) 2011, Google, International Business Machines Corporation and 4 * others. All Rights Reserved. 5 ******************************************************************************* 6 */ 7package com.ibm.icu.dev.test.util; 8 9import java.util.ArrayList; 10import java.util.HashMap; 11import java.util.Iterator; 12import java.util.List; 13import java.util.Map; 14import java.util.Map.Entry; 15 16import com.ibm.icu.impl.Utility; 17import com.ibm.icu.util.BytesTrie; 18import com.ibm.icu.util.BytesTrie.Result; 19import com.ibm.icu.util.BytesTrieBuilder; 20import com.ibm.icu.util.CharsTrie; 21import com.ibm.icu.util.CharsTrieBuilder; 22import com.ibm.icu.util.StringTrieBuilder.Option; 23 24// would be nice to have a BytesTrieBuilder.add(aByte); 25// question: can bytetrie store <"",x>? 26// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error, 27// should happen on add, not on build. 28// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something. 29// need class description; examples of usage; which method can/should be called after which others. 30 31public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> { 32 33 public enum Style { 34 BYTES, CHARS 35 } 36 37 private static final boolean DEBUG = true; 38 39 protected final V[] intToValue; 40 protected final int size; 41 42 protected TrieMap(V[] intToValue, int size) { 43 this.intToValue = intToValue; 44 this.size = size; 45 } 46 47 public int keyByteSize() { 48 return size; 49 } 50 51 public static abstract class Builder<V> { 52 Option option; 53 protected List<V> intToValueTemp = new ArrayList<V>(); 54 protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>(); 55 56 static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) { 57 return with(style, Option.SMALL, keyValuePairs); 58 } 59 60 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) { 61 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 62 : new CharsTrieMap.CharsBuilder<V>(); 63 result.option = option; 64 return result.addAll(keyValuePairs); 65 } 66 67 static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) { 68 return with(style, Option.SMALL, key, value); 69 } 70 71 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) { 72 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 73 : new CharsTrieMap.CharsBuilder<V>(); 74 result.option = option; 75 return result.add(key, value); 76 } 77 78 public abstract Builder<V> add(CharSequence key, V value); 79 80 public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs); 81 82 public abstract TrieMap<V> build(); 83 } 84 85 abstract public V get(CharSequence test); 86 87 /** 88 * Warning: the entry contents are only valid until the next next() call!! 89 */ 90 abstract public Iterator<Entry<CharSequence, V>> iterator(); 91 92 abstract public Matcher<V> getMatcher(); 93 94 public abstract static class Matcher<V> { 95 protected CharSequence text = ""; 96 protected int start = 0; 97 protected int current = 0; 98 99 abstract void set(CharSequence string, int i); 100 101 abstract boolean next(); 102 103 abstract V getValue(); 104 105 abstract int getStart(); 106 107 abstract int getEnd(); 108 109 abstract boolean nextStart(); 110 } 111 112 private static class BytesTrieMap<V> extends TrieMap<V> { 113 private final BytesTrie bytesTrie; 114 private byte[] bytes = new byte[3]; 115 116 private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) { 117 super(intToValue, size); 118 this.bytesTrie = bytesTrie; 119 } 120 121 public V get(CharSequence test) { 122 int length = test.length(); 123 bytesTrie.reset(); 124 if (length == 0) { 125 return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null; 126 } 127 Result result = Result.NO_VALUE; 128 for (int i = 0; i < length; ++i) { 129 if (!result.hasNext()) { 130 return null; 131 } 132 char c = test.charAt(i); 133 int limit = ByteConverter.getBytes(c, bytes, 0); 134 result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit); 135 } 136 return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 137 138 // int length = test.length(); 139 // if (length == 0) { 140 // return null; 141 // } 142 // bytesTrie.reset(); 143 // Result result = null; 144 // byte[] bytes = new byte[3]; 145 // for (int i = 0; i < length; ++i) { 146 // char c = test.charAt(i); 147 // int limit = ByteConverter.getBytes(c, bytes, 0); 148 // for (int j = 0; j < limit; ++j) { 149 // result = bytesTrie.next(bytes[j]&0xFF); 150 // if (!result.matches()) { 151 // return null; 152 // } 153 // } 154 // } 155 // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 156 } 157 158 public String toString() { 159 return toString(bytesTrie, " : ", "\n"); 160 } 161 162 /** 163 * Warning: the entry contents are only valid until the next next() call!! 164 */ 165 public Iterator<Entry<CharSequence, V>> iterator() { 166 // TODO Auto-generated method stub 167 return new BytesIterator(); 168 } 169 170 public TrieMap.Matcher<V> getMatcher() { 171 return new BytesMatcher(); 172 } 173 174 private class BytesIterator implements Iterator<Entry<CharSequence, V>> { 175 BytesTrie.Iterator iterator = bytesTrie.iterator(); 176 BytesEntry entry = new BytesEntry(); 177 178 public boolean hasNext() { 179 return iterator.hasNext(); 180 } 181 182 public Entry<CharSequence, V> next() { 183 entry.bytesEntry = iterator.next(); 184 return entry; 185 } 186 187 public void remove() { 188 throw new UnsupportedOperationException(); 189 } 190 } 191 192 private class BytesEntry implements Entry<CharSequence, V> { 193 public BytesTrie.Entry bytesEntry; 194 StringBuilder buffer = new StringBuilder(); 195 196 public CharSequence getKey() { 197 buffer.setLength(0); 198 ByteConverter.getChars(bytesEntry, buffer); 199 return buffer; 200 } 201 202 public V getValue() { 203 return intToValue[bytesEntry.value]; 204 } 205 206 public V setValue(V value) { 207 throw new UnsupportedOperationException(); 208 } 209 } 210 211 public class BytesMatcher extends Matcher<V> { 212 private byte[] bytes = new byte[3]; 213 private V value = null; 214 215 public void set(CharSequence text, int start) { 216 this.text = text; 217 this.start = start; 218 this.current = start; 219 } 220 221 public int getStart() { 222 return start; 223 } 224 225 public int getEnd() { 226 return current; 227 } 228 229 /** 230 * Finds the next match. Returns false when there are no possible further matches from the current start 231 * point. Once that happens, call nextStart(); Call getValue to get the current value. 232 * 233 * @return false when done. There may be a value, however. 234 */ 235 public boolean next() { 236 while (current < text.length()) { 237 char c = text.charAt(current++); 238 int limit = ByteConverter.getBytes(c, bytes, 0); 239 for (int j = 0; j < limit; ++j) { 240 Result result = bytesTrie.next(bytes[j]); 241 if (result.hasValue()) { 242 if (j < limit - 1) { 243 throw new IllegalArgumentException("Data corrupt"); 244 } 245 value = intToValue[bytesTrie.getValue()]; 246 return result.hasNext(); 247 } else if (!result.matches()) { 248 value = null; 249 return false; 250 } 251 } 252 } 253 value = null; 254 return false; 255 } 256 257 public boolean nextStart() { 258 if (start >= text.length()) { 259 return false; 260 } 261 ++start; 262 current = start; 263 bytesTrie.reset(); 264 return true; 265 } 266 267 public V getValue() { 268 return value; 269 } 270 } 271 } 272 273 public static class BytesBuilder<V> extends Builder<V> { 274 BytesTrieBuilder builder = new BytesTrieBuilder(); 275 byte[] bytes = new byte[200]; 276 List<String> debugBytes = DEBUG ? new ArrayList<String>() : null; 277 278 public BytesBuilder<V> add(CharSequence key, V value) { 279 // traverse the values, and get a mapping of a byte string to list of 280 // integers, and a mapping from those integers to a set of values 281 Integer index; 282 if (option == Option.SMALL) { 283 index = valueToIntegerTemp.get(value); 284 if (index == null) { 285 index = intToValueTemp.size(); 286 intToValueTemp.add(value); 287 valueToIntegerTemp.put(value, index); 288 } 289 } else { 290 index = intToValueTemp.size(); 291 intToValueTemp.add(value); 292 } 293 // dumb implementation for now 294 // the buffer size is at most 3 * number_of_chars 295 if (bytes.length < key.length() * 3) { 296 bytes = new byte[64 + key.length() * 3]; 297 } 298 int limit = 0; 299 for (int i = 0; i < key.length(); ++i) { 300 char c = key.charAt(i); 301 limit = ByteConverter.getBytes(c, bytes, limit); 302 } 303 try { 304 builder.add(bytes, limit, index); 305 return this; 306 } catch (Exception e) { 307 ArrayList<String> list = new ArrayList<String>(); 308 for (int i = 0; i < limit; ++i) { 309 list.add(Utility.hex(bytes[i])); 310 } 311 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e); 312 } 313 } 314 315 public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) { 316 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 317 add(entry.getKey(), entry.getValue()); 318 } 319 return this; 320 } 321 322 public TrieMap<V> build() { 323 int size = builder.buildByteBuffer(option).remaining(); 324 BytesTrie bytesTrie = builder.build(option); 325 @SuppressWarnings("unchecked") 326 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 327 return new BytesTrieMap<V>(bytesTrie, intToValueArray, size); 328 } 329 } 330 331 private static class CharsTrieMap<V> extends TrieMap<V> { 332 private final CharsTrie charsTrie; 333 334 private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) { 335 super(intToValue, size); 336 this.charsTrie = charsTrie; 337 } 338 339 public V get(CharSequence test) { 340 Result result = charsTrie.reset().next(test, 0, test.length()); 341 return result.hasValue() ? intToValue[charsTrie.getValue()] : null; 342 } 343 344 public String toString() { 345 return toString(charsTrie, " : ", "\n"); 346 } 347 348 /** 349 * Warning: the entry contents are only valid until the next next() call!! 350 */ 351 public Iterator<Entry<CharSequence, V>> iterator() { 352 // TODO Auto-generated method stub 353 return new CharsIterator(); 354 } 355 356 public TrieMap.Matcher<V> getMatcher() { 357 return new CharsMatcher(); 358 } 359 360 private class CharsIterator implements Iterator<Entry<CharSequence, V>> { 361 CharsTrie.Iterator iterator = charsTrie.iterator(); 362 CharsEntry entry = new CharsEntry(); 363 364 public boolean hasNext() { 365 return iterator.hasNext(); 366 } 367 368 public Entry<CharSequence, V> next() { 369 entry.charsEntry = iterator.next(); 370 return entry; 371 } 372 373 public void remove() { 374 throw new UnsupportedOperationException(); 375 } 376 377 } 378 379 private class CharsEntry implements Entry<CharSequence, V> { 380 public CharsTrie.Entry charsEntry; 381 382 public CharSequence getKey() { 383 return charsEntry.chars; 384 } 385 386 public V getValue() { 387 return intToValue[charsEntry.value]; 388 } 389 390 public V setValue(V value) { 391 throw new UnsupportedOperationException(); 392 } 393 } 394 395 public class CharsMatcher extends Matcher<V> { 396 private V value = null; 397 398 public void set(CharSequence text, int start) { 399 this.text = text; 400 this.start = start; 401 this.current = start; 402 } 403 404 public int getStart() { 405 return start; 406 } 407 408 public int getEnd() { 409 return current; 410 } 411 412 /** 413 * Finds the next match. Returns false when there are no possible further matches from the current start 414 * point. Once that happens, call nextStart(); Call getValue to get the current value. 415 * 416 * @return false when done. There may be a value, however. 417 */ 418 public boolean next() { 419 while (current < text.length()) { 420 char c = text.charAt(current++); 421 Result result = charsTrie.next(c); 422 if (result.hasValue()) { 423 value = intToValue[charsTrie.getValue()]; 424 return result.hasNext(); 425 } else if (!result.matches()) { 426 value = null; 427 return false; 428 429 } 430 } 431 value = null; 432 return false; 433 } 434 435 public boolean nextStart() { 436 if (start >= text.length()) { 437 return false; 438 } 439 ++start; 440 current = start; 441 charsTrie.reset(); 442 return true; 443 } 444 445 public V getValue() { 446 return value; 447 } 448 } 449 } 450 451 public static class CharsBuilder<V> extends Builder<V> { 452 CharsTrieBuilder builder = new CharsTrieBuilder(); 453 454 public CharsBuilder<V> add(CharSequence key, V value) { 455 // traverse the values, and get a mapping of a byte string to list of 456 // integers, and a mapping from those integers to a set of values 457 Integer index; 458 if (option == Option.SMALL) { 459 index = valueToIntegerTemp.get(value); 460 if (index == null) { 461 index = intToValueTemp.size(); 462 intToValueTemp.add(value); 463 valueToIntegerTemp.put(value, index); 464 } 465 } else { 466 index = intToValueTemp.size(); 467 intToValueTemp.add(value); 468 } 469 try { 470 builder.add(key.toString(), index); 471 return this; 472 } catch (Exception e) { 473 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e); 474 } 475 } 476 477 public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) { 478 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 479 add(entry.getKey(), entry.getValue()); 480 } 481 return this; 482 } 483 484 public TrieMap<V> build() { 485 CharSequence buildCharSequence = builder.buildCharSequence(option); 486 int size = 2 * buildCharSequence.length(); 487 // CharsTrie charsTrie = builder.build(option); 488 CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0); 489 @SuppressWarnings("unchecked") 490 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 491 return new CharsTrieMap<V>(charsTrie, intToValueArray, size); 492 } 493 } 494 495 /** 496 * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and 497 * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended 498 * for general usage 499 * 500 * <pre> 501 * 0000..007F - 0xxx xxxx 502 * 0000..7EFF - 1yyy yyyy xxxx xxxx 503 * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx 504 * </pre> 505 */ 506 static class ByteConverter { 507 public static int getBytes(char source, byte[] bytes, int limit) { 508 if (source < 0x80) { 509 bytes[limit++] = (byte) source; 510 } else if (source < 0x7F00) { 511 bytes[limit++] = (byte) (0x80 | (source >> 8)); 512 bytes[limit++] = (byte) source; 513 } else { 514 bytes[limit++] = (byte) -1; 515 bytes[limit++] = (byte) (source >> 8); 516 bytes[limit++] = (byte) source; 517 } 518 return limit; 519 } 520 521 /** 522 * Transform the string into a sequence of bytes, appending them after start, and return the new limit. 523 */ 524 public static int getBytes(CharSequence source, byte[] bytes, int limit) { 525 for (int i = 0; i < source.length(); ++i) { 526 limit = getBytes(source.charAt(i), bytes, limit); 527 } 528 return limit; 529 } 530 531 /** 532 * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking. 533 */ 534 public static String getChars(byte[] bytes, int start, int limit) { 535 StringBuilder buffer = new StringBuilder(); 536 char[] output = new char[1]; 537 for (int i = start; i < limit;) { 538 i = getChar(bytes, i, output); 539 buffer.append(output[0]); 540 } 541 return buffer.toString(); 542 } 543 544 public static int getChar(byte[] bytes, int start, char[] output) { 545 byte b = bytes[start++]; 546 if (b >= 0) { 547 output[0] = (char) b; 548 } else if (b != (byte) -1) { // 2 bytes 549 int b1 = 0x7F & b; 550 int b2 = 0xFF & bytes[start++]; 551 output[0] = (char) ((b1 << 8) | b2); 552 } else { 553 int b2 = bytes[start++]; 554 int b3 = 0xFF & bytes[start++]; 555 output[0] = (char) ((b2 << 8) | b3); 556 } 557 return start; 558 } 559 560 private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) { 561 int len = entry.bytesLength(); 562 for (int i = 0; i < len;) { 563 byte b = entry.byteAt(i++); 564 if (b >= 0) { 565 stringBuilder.append((char) b); 566 } else if (b != (byte) -1) { // 2 bytes 567 int b1 = 0x7F & b; 568 int b2 = 0xFF & entry.byteAt(i++); 569 stringBuilder.append((char) ((b1 << 8) | b2)); 570 } else { 571 int b2 = entry.byteAt(i++); 572 int b3 = 0xFF & entry.byteAt(i++); 573 stringBuilder.append((char) ((b2 << 8) | b3)); 574 } 575 } 576 } 577 } 578 579 public static String toString(BytesTrie bytesTrie2) { 580 return toString(bytesTrie2, " : ", "\n"); 581 } 582 583 public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 584 StringBuilder buffer = new StringBuilder(); 585 BytesTrie.Iterator iterator = bytesTrie2.iterator(); 586 while (iterator.hasNext()) { 587 BytesTrie.Entry bytesEntry = iterator.next(); 588 int len = bytesEntry.bytesLength(); 589 byte[] bytes = new byte[len]; 590 bytesEntry.copyBytesTo(bytes, 0); 591 buffer.append(Utility.hex(bytes, 0, len, " ")) 592 .append(keyValueSeparator) 593 .append(bytesEntry.value) 594 .append(itemSeparator); 595 } 596 return buffer.toString(); 597 } 598 599 public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 600 StringBuilder buffer = new StringBuilder(); 601 CharsTrie.Iterator iterator = bytesTrie2.iterator(); 602 while (iterator.hasNext()) { 603 CharsTrie.Entry bytesEntry = iterator.next(); 604 buffer.append(Utility.hex(bytesEntry.chars)) 605 .append(keyValueSeparator) 606 .append(bytesEntry.value) 607 .append(itemSeparator); 608 } 609 return buffer.toString(); 610 } 611 612} 613