TrieMap.java revision aefe4d1f8f1773ead1a52f7a5d2c9e0009353600
1/* GENERATED SOURCE. DO NOT MODIFY. */ 2/* 3 ******************************************************************************* 4 * Copyright (C) 2011, Google, International Business Machines Corporation and 5 * others. All Rights Reserved. 6 ******************************************************************************* 7 */ 8package android.icu.dev.test.util; 9 10import java.util.ArrayList; 11import java.util.HashMap; 12import java.util.Iterator; 13import java.util.List; 14import java.util.Map; 15import java.util.Map.Entry; 16 17import android.icu.impl.Utility; 18import android.icu.util.BytesTrie; 19import android.icu.util.BytesTrie.Result; 20import android.icu.util.BytesTrieBuilder; 21import android.icu.util.CharsTrie; 22import android.icu.util.CharsTrieBuilder; 23import android.icu.util.StringTrieBuilder.Option; 24 25// would be nice to have a BytesTrieBuilder.add(aByte); 26// question: can bytetrie store <"",x>? 27// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error, 28// should happen on add, not on build. 29// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something. 30// need class description; examples of usage; which method can/should be called after which others. 31 32public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> { 33 34 public enum Style { 35 BYTES, CHARS 36 } 37 38 private static final boolean DEBUG = true; 39 40 protected final V[] intToValue; 41 protected final int size; 42 43 protected TrieMap(V[] intToValue, int size) { 44 this.intToValue = intToValue; 45 this.size = size; 46 } 47 48 public int keyByteSize() { 49 return size; 50 } 51 52 public static abstract class Builder<V> { 53 Option option; 54 protected List<V> intToValueTemp = new ArrayList<V>(); 55 protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>(); 56 57 static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) { 58 return with(style, Option.SMALL, keyValuePairs); 59 } 60 61 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) { 62 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 63 : new CharsTrieMap.CharsBuilder<V>(); 64 result.option = option; 65 return result.addAll(keyValuePairs); 66 } 67 68 static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) { 69 return with(style, Option.SMALL, key, value); 70 } 71 72 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) { 73 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 74 : new CharsTrieMap.CharsBuilder<V>(); 75 result.option = option; 76 return result.add(key, value); 77 } 78 79 public abstract Builder<V> add(CharSequence key, V value); 80 81 public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs); 82 83 public abstract TrieMap<V> build(); 84 } 85 86 abstract public V get(CharSequence test); 87 88 /** 89 * Warning: the entry contents are only valid until the next next() call!! 90 */ 91 abstract public Iterator<Entry<CharSequence, V>> iterator(); 92 93 abstract public Matcher<V> getMatcher(); 94 95 public abstract static class Matcher<V> { 96 protected CharSequence text = ""; 97 protected int start = 0; 98 protected int current = 0; 99 100 abstract void set(CharSequence string, int i); 101 102 abstract boolean next(); 103 104 abstract V getValue(); 105 106 abstract int getStart(); 107 108 abstract int getEnd(); 109 110 abstract boolean nextStart(); 111 } 112 113 private static class BytesTrieMap<V> extends TrieMap<V> { 114 private final BytesTrie bytesTrie; 115 private byte[] bytes = new byte[3]; 116 117 private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) { 118 super(intToValue, size); 119 this.bytesTrie = bytesTrie; 120 } 121 122 public V get(CharSequence test) { 123 int length = test.length(); 124 bytesTrie.reset(); 125 if (length == 0) { 126 return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null; 127 } 128 Result result = Result.NO_VALUE; 129 for (int i = 0; i < length; ++i) { 130 if (!result.hasNext()) { 131 return null; 132 } 133 char c = test.charAt(i); 134 int limit = ByteConverter.getBytes(c, bytes, 0); 135 result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit); 136 } 137 return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 138 139 // int length = test.length(); 140 // if (length == 0) { 141 // return null; 142 // } 143 // bytesTrie.reset(); 144 // Result result = null; 145 // byte[] bytes = new byte[3]; 146 // for (int i = 0; i < length; ++i) { 147 // char c = test.charAt(i); 148 // int limit = ByteConverter.getBytes(c, bytes, 0); 149 // for (int j = 0; j < limit; ++j) { 150 // result = bytesTrie.next(bytes[j]&0xFF); 151 // if (!result.matches()) { 152 // return null; 153 // } 154 // } 155 // } 156 // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 157 } 158 159 public String toString() { 160 return toString(bytesTrie, " : ", "\n"); 161 } 162 163 /** 164 * Warning: the entry contents are only valid until the next next() call!! 165 */ 166 public Iterator<Entry<CharSequence, V>> iterator() { 167 // TODO Auto-generated method stub 168 return new BytesIterator(); 169 } 170 171 public TrieMap.Matcher<V> getMatcher() { 172 return new BytesMatcher(); 173 } 174 175 private class BytesIterator implements Iterator<Entry<CharSequence, V>> { 176 BytesTrie.Iterator iterator = bytesTrie.iterator(); 177 BytesEntry entry = new BytesEntry(); 178 179 public boolean hasNext() { 180 return iterator.hasNext(); 181 } 182 183 public Entry<CharSequence, V> next() { 184 entry.bytesEntry = iterator.next(); 185 return entry; 186 } 187 188 public void remove() { 189 throw new UnsupportedOperationException(); 190 } 191 } 192 193 private class BytesEntry implements Entry<CharSequence, V> { 194 public BytesTrie.Entry bytesEntry; 195 StringBuilder buffer = new StringBuilder(); 196 197 public CharSequence getKey() { 198 buffer.setLength(0); 199 ByteConverter.getChars(bytesEntry, buffer); 200 return buffer; 201 } 202 203 public V getValue() { 204 return intToValue[bytesEntry.value]; 205 } 206 207 public V setValue(V value) { 208 throw new UnsupportedOperationException(); 209 } 210 } 211 212 public class BytesMatcher extends Matcher<V> { 213 private byte[] bytes = new byte[3]; 214 private V value = null; 215 216 public void set(CharSequence text, int start) { 217 this.text = text; 218 this.start = start; 219 this.current = start; 220 } 221 222 public int getStart() { 223 return start; 224 } 225 226 public int getEnd() { 227 return current; 228 } 229 230 /** 231 * Finds the next match. Returns false when there are no possible further matches from the current start 232 * point. Once that happens, call nextStart(); Call getValue to get the current value. 233 * 234 * @return false when done. There may be a value, however. 235 */ 236 public boolean next() { 237 while (current < text.length()) { 238 char c = text.charAt(current++); 239 int limit = ByteConverter.getBytes(c, bytes, 0); 240 for (int j = 0; j < limit; ++j) { 241 Result result = bytesTrie.next(bytes[j]); 242 if (result.hasValue()) { 243 if (j < limit - 1) { 244 throw new IllegalArgumentException("Data corrupt"); 245 } 246 value = intToValue[bytesTrie.getValue()]; 247 return result.hasNext(); 248 } else if (!result.matches()) { 249 value = null; 250 return false; 251 } 252 } 253 } 254 value = null; 255 return false; 256 } 257 258 public boolean nextStart() { 259 if (start >= text.length()) { 260 return false; 261 } 262 ++start; 263 current = start; 264 bytesTrie.reset(); 265 return true; 266 } 267 268 public V getValue() { 269 return value; 270 } 271 } 272 } 273 274 public static class BytesBuilder<V> extends Builder<V> { 275 BytesTrieBuilder builder = new BytesTrieBuilder(); 276 byte[] bytes = new byte[200]; 277 List<String> debugBytes = DEBUG ? new ArrayList<String>() : null; 278 279 public BytesBuilder<V> add(CharSequence key, V value) { 280 // traverse the values, and get a mapping of a byte string to list of 281 // integers, and a mapping from those integers to a set of values 282 Integer index; 283 if (option == Option.SMALL) { 284 index = valueToIntegerTemp.get(value); 285 if (index == null) { 286 index = intToValueTemp.size(); 287 intToValueTemp.add(value); 288 valueToIntegerTemp.put(value, index); 289 } 290 } else { 291 index = intToValueTemp.size(); 292 intToValueTemp.add(value); 293 } 294 // dumb implementation for now 295 // the buffer size is at most 3 * number_of_chars 296 if (bytes.length < key.length() * 3) { 297 bytes = new byte[64 + key.length() * 3]; 298 } 299 int limit = 0; 300 for (int i = 0; i < key.length(); ++i) { 301 char c = key.charAt(i); 302 limit = ByteConverter.getBytes(c, bytes, limit); 303 } 304 try { 305 builder.add(bytes, limit, index); 306 return this; 307 } catch (Exception e) { 308 ArrayList<String> list = new ArrayList<String>(); 309 for (int i = 0; i < limit; ++i) { 310 list.add(Utility.hex(bytes[i])); 311 } 312 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e); 313 } 314 } 315 316 public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) { 317 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 318 add(entry.getKey(), entry.getValue()); 319 } 320 return this; 321 } 322 323 public TrieMap<V> build() { 324 int size = builder.buildByteBuffer(option).remaining(); 325 BytesTrie bytesTrie = builder.build(option); 326 @SuppressWarnings("unchecked") 327 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 328 return new BytesTrieMap<V>(bytesTrie, intToValueArray, size); 329 } 330 } 331 332 private static class CharsTrieMap<V> extends TrieMap<V> { 333 private final CharsTrie charsTrie; 334 335 private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) { 336 super(intToValue, size); 337 this.charsTrie = charsTrie; 338 } 339 340 public V get(CharSequence test) { 341 Result result = charsTrie.reset().next(test, 0, test.length()); 342 return result.hasValue() ? intToValue[charsTrie.getValue()] : null; 343 } 344 345 public String toString() { 346 return toString(charsTrie, " : ", "\n"); 347 } 348 349 /** 350 * Warning: the entry contents are only valid until the next next() call!! 351 */ 352 public Iterator<Entry<CharSequence, V>> iterator() { 353 // TODO Auto-generated method stub 354 return new CharsIterator(); 355 } 356 357 public TrieMap.Matcher<V> getMatcher() { 358 return new CharsMatcher(); 359 } 360 361 private class CharsIterator implements Iterator<Entry<CharSequence, V>> { 362 CharsTrie.Iterator iterator = charsTrie.iterator(); 363 CharsEntry entry = new CharsEntry(); 364 365 public boolean hasNext() { 366 return iterator.hasNext(); 367 } 368 369 public Entry<CharSequence, V> next() { 370 entry.charsEntry = iterator.next(); 371 return entry; 372 } 373 374 public void remove() { 375 throw new UnsupportedOperationException(); 376 } 377 378 } 379 380 private class CharsEntry implements Entry<CharSequence, V> { 381 public CharsTrie.Entry charsEntry; 382 383 public CharSequence getKey() { 384 return charsEntry.chars; 385 } 386 387 public V getValue() { 388 return intToValue[charsEntry.value]; 389 } 390 391 public V setValue(V value) { 392 throw new UnsupportedOperationException(); 393 } 394 } 395 396 public class CharsMatcher extends Matcher<V> { 397 private V value = null; 398 399 public void set(CharSequence text, int start) { 400 this.text = text; 401 this.start = start; 402 this.current = start; 403 } 404 405 public int getStart() { 406 return start; 407 } 408 409 public int getEnd() { 410 return current; 411 } 412 413 /** 414 * Finds the next match. Returns false when there are no possible further matches from the current start 415 * point. Once that happens, call nextStart(); Call getValue to get the current value. 416 * 417 * @return false when done. There may be a value, however. 418 */ 419 public boolean next() { 420 while (current < text.length()) { 421 char c = text.charAt(current++); 422 Result result = charsTrie.next(c); 423 if (result.hasValue()) { 424 value = intToValue[charsTrie.getValue()]; 425 return result.hasNext(); 426 } else if (!result.matches()) { 427 value = null; 428 return false; 429 430 } 431 } 432 value = null; 433 return false; 434 } 435 436 public boolean nextStart() { 437 if (start >= text.length()) { 438 return false; 439 } 440 ++start; 441 current = start; 442 charsTrie.reset(); 443 return true; 444 } 445 446 public V getValue() { 447 return value; 448 } 449 } 450 } 451 452 public static class CharsBuilder<V> extends Builder<V> { 453 CharsTrieBuilder builder = new CharsTrieBuilder(); 454 455 public CharsBuilder<V> add(CharSequence key, V value) { 456 // traverse the values, and get a mapping of a byte string to list of 457 // integers, and a mapping from those integers to a set of values 458 Integer index; 459 if (option == Option.SMALL) { 460 index = valueToIntegerTemp.get(value); 461 if (index == null) { 462 index = intToValueTemp.size(); 463 intToValueTemp.add(value); 464 valueToIntegerTemp.put(value, index); 465 } 466 } else { 467 index = intToValueTemp.size(); 468 intToValueTemp.add(value); 469 } 470 try { 471 builder.add(key.toString(), index); 472 return this; 473 } catch (Exception e) { 474 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e); 475 } 476 } 477 478 public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) { 479 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 480 add(entry.getKey(), entry.getValue()); 481 } 482 return this; 483 } 484 485 public TrieMap<V> build() { 486 CharSequence buildCharSequence = builder.buildCharSequence(option); 487 int size = 2 * buildCharSequence.length(); 488 // CharsTrie charsTrie = builder.build(option); 489 CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0); 490 @SuppressWarnings("unchecked") 491 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 492 return new CharsTrieMap<V>(charsTrie, intToValueArray, size); 493 } 494 } 495 496 /** 497 * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and 498 * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended 499 * for general usage 500 * 501 * <pre> 502 * 0000..007F - 0xxx xxxx 503 * 0000..7EFF - 1yyy yyyy xxxx xxxx 504 * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx 505 * </pre> 506 */ 507 static class ByteConverter { 508 public static int getBytes(char source, byte[] bytes, int limit) { 509 if (source < 0x80) { 510 bytes[limit++] = (byte) source; 511 } else if (source < 0x7F00) { 512 bytes[limit++] = (byte) (0x80 | (source >> 8)); 513 bytes[limit++] = (byte) source; 514 } else { 515 bytes[limit++] = (byte) -1; 516 bytes[limit++] = (byte) (source >> 8); 517 bytes[limit++] = (byte) source; 518 } 519 return limit; 520 } 521 522 /** 523 * Transform the string into a sequence of bytes, appending them after start, and return the new limit. 524 */ 525 public static int getBytes(CharSequence source, byte[] bytes, int limit) { 526 for (int i = 0; i < source.length(); ++i) { 527 limit = getBytes(source.charAt(i), bytes, limit); 528 } 529 return limit; 530 } 531 532 /** 533 * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking. 534 */ 535 public static String getChars(byte[] bytes, int start, int limit) { 536 StringBuilder buffer = new StringBuilder(); 537 char[] output = new char[1]; 538 for (int i = start; i < limit;) { 539 i = getChar(bytes, i, output); 540 buffer.append(output[0]); 541 } 542 return buffer.toString(); 543 } 544 545 public static int getChar(byte[] bytes, int start, char[] output) { 546 byte b = bytes[start++]; 547 if (b >= 0) { 548 output[0] = (char) b; 549 } else if (b != (byte) -1) { // 2 bytes 550 int b1 = 0x7F & b; 551 int b2 = 0xFF & bytes[start++]; 552 output[0] = (char) ((b1 << 8) | b2); 553 } else { 554 int b2 = bytes[start++]; 555 int b3 = 0xFF & bytes[start++]; 556 output[0] = (char) ((b2 << 8) | b3); 557 } 558 return start; 559 } 560 561 private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) { 562 int len = entry.bytesLength(); 563 for (int i = 0; i < len;) { 564 byte b = entry.byteAt(i++); 565 if (b >= 0) { 566 stringBuilder.append((char) b); 567 } else if (b != (byte) -1) { // 2 bytes 568 int b1 = 0x7F & b; 569 int b2 = 0xFF & entry.byteAt(i++); 570 stringBuilder.append((char) ((b1 << 8) | b2)); 571 } else { 572 int b2 = entry.byteAt(i++); 573 int b3 = 0xFF & entry.byteAt(i++); 574 stringBuilder.append((char) ((b2 << 8) | b3)); 575 } 576 } 577 } 578 } 579 580 public static String toString(BytesTrie bytesTrie2) { 581 return toString(bytesTrie2, " : ", "\n"); 582 } 583 584 public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 585 StringBuilder buffer = new StringBuilder(); 586 BytesTrie.Iterator iterator = bytesTrie2.iterator(); 587 while (iterator.hasNext()) { 588 BytesTrie.Entry bytesEntry = iterator.next(); 589 int len = bytesEntry.bytesLength(); 590 byte[] bytes = new byte[len]; 591 bytesEntry.copyBytesTo(bytes, 0); 592 buffer.append(Utility.hex(bytes, 0, len, " ")) 593 .append(keyValueSeparator) 594 .append(bytesEntry.value) 595 .append(itemSeparator); 596 } 597 return buffer.toString(); 598 } 599 600 public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 601 StringBuilder buffer = new StringBuilder(); 602 CharsTrie.Iterator iterator = bytesTrie2.iterator(); 603 while (iterator.hasNext()) { 604 CharsTrie.Entry bytesEntry = iterator.next(); 605 buffer.append(Utility.hex(bytesEntry.chars)) 606 .append(keyValueSeparator) 607 .append(bytesEntry.value) 608 .append(itemSeparator); 609 } 610 return buffer.toString(); 611 } 612 613} 614