1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 ******************************************************************************* 5 * Copyright (C) 2011, Google, International Business Machines Corporation and 6 * others. All Rights Reserved. 7 ******************************************************************************* 8 */ 9package com.ibm.icu.dev.test.util; 10 11import java.util.ArrayList; 12import java.util.HashMap; 13import java.util.Iterator; 14import java.util.List; 15import java.util.Map; 16import java.util.Map.Entry; 17 18import com.ibm.icu.impl.Utility; 19import com.ibm.icu.util.BytesTrie; 20import com.ibm.icu.util.BytesTrie.Result; 21import com.ibm.icu.util.BytesTrieBuilder; 22import com.ibm.icu.util.CharsTrie; 23import com.ibm.icu.util.CharsTrieBuilder; 24import com.ibm.icu.util.StringTrieBuilder.Option; 25 26// would be nice to have a BytesTrieBuilder.add(aByte); 27// question: can bytetrie store <"",x>? 28// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error, 29// should happen on add, not on build. 30// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something. 31// need class description; examples of usage; which method can/should be called after which others. 32 33public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> { 34 35 public enum Style { 36 BYTES, CHARS 37 } 38 39 private static final boolean DEBUG = true; 40 41 protected final V[] intToValue; 42 protected final int size; 43 44 protected TrieMap(V[] intToValue, int size) { 45 this.intToValue = intToValue; 46 this.size = size; 47 } 48 49 public int keyByteSize() { 50 return size; 51 } 52 53 public static abstract class Builder<V> { 54 Option option; 55 protected List<V> intToValueTemp = new ArrayList<V>(); 56 protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>(); 57 58 static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) { 59 return with(style, Option.SMALL, keyValuePairs); 60 } 61 62 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) { 63 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 64 : new CharsTrieMap.CharsBuilder<V>(); 65 result.option = option; 66 return result.addAll(keyValuePairs); 67 } 68 69 static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) { 70 return with(style, Option.SMALL, key, value); 71 } 72 73 static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) { 74 Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 75 : new CharsTrieMap.CharsBuilder<V>(); 76 result.option = option; 77 return result.add(key, value); 78 } 79 80 public abstract Builder<V> add(CharSequence key, V value); 81 82 public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs); 83 84 public abstract TrieMap<V> build(); 85 } 86 87 abstract public V get(CharSequence test); 88 89 /** 90 * Warning: the entry contents are only valid until the next next() call!! 91 */ 92 abstract public Iterator<Entry<CharSequence, V>> iterator(); 93 94 abstract public Matcher<V> getMatcher(); 95 96 public abstract static class Matcher<V> { 97 protected CharSequence text = ""; 98 protected int start = 0; 99 protected int current = 0; 100 101 abstract void set(CharSequence string, int i); 102 103 abstract boolean next(); 104 105 abstract V getValue(); 106 107 abstract int getStart(); 108 109 abstract int getEnd(); 110 111 abstract boolean nextStart(); 112 } 113 114 private static class BytesTrieMap<V> extends TrieMap<V> { 115 private final BytesTrie bytesTrie; 116 private byte[] bytes = new byte[3]; 117 118 private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) { 119 super(intToValue, size); 120 this.bytesTrie = bytesTrie; 121 } 122 123 public V get(CharSequence test) { 124 int length = test.length(); 125 bytesTrie.reset(); 126 if (length == 0) { 127 return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null; 128 } 129 Result result = Result.NO_VALUE; 130 for (int i = 0; i < length; ++i) { 131 if (!result.hasNext()) { 132 return null; 133 } 134 char c = test.charAt(i); 135 int limit = ByteConverter.getBytes(c, bytes, 0); 136 result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit); 137 } 138 return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 139 140 // int length = test.length(); 141 // if (length == 0) { 142 // return null; 143 // } 144 // bytesTrie.reset(); 145 // Result result = null; 146 // byte[] bytes = new byte[3]; 147 // for (int i = 0; i < length; ++i) { 148 // char c = test.charAt(i); 149 // int limit = ByteConverter.getBytes(c, bytes, 0); 150 // for (int j = 0; j < limit; ++j) { 151 // result = bytesTrie.next(bytes[j]&0xFF); 152 // if (!result.matches()) { 153 // return null; 154 // } 155 // } 156 // } 157 // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 158 } 159 160 public String toString() { 161 return toString(bytesTrie, " : ", "\n"); 162 } 163 164 /** 165 * Warning: the entry contents are only valid until the next next() call!! 166 */ 167 public Iterator<Entry<CharSequence, V>> iterator() { 168 // TODO Auto-generated method stub 169 return new BytesIterator(); 170 } 171 172 public TrieMap.Matcher<V> getMatcher() { 173 return new BytesMatcher(); 174 } 175 176 private class BytesIterator implements Iterator<Entry<CharSequence, V>> { 177 BytesTrie.Iterator iterator = bytesTrie.iterator(); 178 BytesEntry entry = new BytesEntry(); 179 180 public boolean hasNext() { 181 return iterator.hasNext(); 182 } 183 184 public Entry<CharSequence, V> next() { 185 entry.bytesEntry = iterator.next(); 186 return entry; 187 } 188 189 public void remove() { 190 throw new UnsupportedOperationException(); 191 } 192 } 193 194 private class BytesEntry implements Entry<CharSequence, V> { 195 public BytesTrie.Entry bytesEntry; 196 StringBuilder buffer = new StringBuilder(); 197 198 public CharSequence getKey() { 199 buffer.setLength(0); 200 ByteConverter.getChars(bytesEntry, buffer); 201 return buffer; 202 } 203 204 public V getValue() { 205 return intToValue[bytesEntry.value]; 206 } 207 208 public V setValue(V value) { 209 throw new UnsupportedOperationException(); 210 } 211 } 212 213 public class BytesMatcher extends Matcher<V> { 214 private byte[] bytes = new byte[3]; 215 private V value = null; 216 217 public void set(CharSequence text, int start) { 218 this.text = text; 219 this.start = start; 220 this.current = start; 221 } 222 223 public int getStart() { 224 return start; 225 } 226 227 public int getEnd() { 228 return current; 229 } 230 231 /** 232 * Finds the next match. Returns false when there are no possible further matches from the current start 233 * point. Once that happens, call nextStart(); Call getValue to get the current value. 234 * 235 * @return false when done. There may be a value, however. 236 */ 237 public boolean next() { 238 while (current < text.length()) { 239 char c = text.charAt(current++); 240 int limit = ByteConverter.getBytes(c, bytes, 0); 241 for (int j = 0; j < limit; ++j) { 242 Result result = bytesTrie.next(bytes[j]); 243 if (result.hasValue()) { 244 if (j < limit - 1) { 245 throw new IllegalArgumentException("Data corrupt"); 246 } 247 value = intToValue[bytesTrie.getValue()]; 248 return result.hasNext(); 249 } else if (!result.matches()) { 250 value = null; 251 return false; 252 } 253 } 254 } 255 value = null; 256 return false; 257 } 258 259 public boolean nextStart() { 260 if (start >= text.length()) { 261 return false; 262 } 263 ++start; 264 current = start; 265 bytesTrie.reset(); 266 return true; 267 } 268 269 public V getValue() { 270 return value; 271 } 272 } 273 } 274 275 public static class BytesBuilder<V> extends Builder<V> { 276 BytesTrieBuilder builder = new BytesTrieBuilder(); 277 byte[] bytes = new byte[200]; 278 List<String> debugBytes = DEBUG ? new ArrayList<String>() : null; 279 280 public BytesBuilder<V> add(CharSequence key, V value) { 281 // traverse the values, and get a mapping of a byte string to list of 282 // integers, and a mapping from those integers to a set of values 283 Integer index; 284 if (option == Option.SMALL) { 285 index = valueToIntegerTemp.get(value); 286 if (index == null) { 287 index = intToValueTemp.size(); 288 intToValueTemp.add(value); 289 valueToIntegerTemp.put(value, index); 290 } 291 } else { 292 index = intToValueTemp.size(); 293 intToValueTemp.add(value); 294 } 295 // dumb implementation for now 296 // the buffer size is at most 3 * number_of_chars 297 if (bytes.length < key.length() * 3) { 298 bytes = new byte[64 + key.length() * 3]; 299 } 300 int limit = 0; 301 for (int i = 0; i < key.length(); ++i) { 302 char c = key.charAt(i); 303 limit = ByteConverter.getBytes(c, bytes, limit); 304 } 305 try { 306 builder.add(bytes, limit, index); 307 return this; 308 } catch (Exception e) { 309 ArrayList<String> list = new ArrayList<String>(); 310 for (int i = 0; i < limit; ++i) { 311 list.add(Utility.hex(bytes[i])); 312 } 313 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e); 314 } 315 } 316 317 public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) { 318 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 319 add(entry.getKey(), entry.getValue()); 320 } 321 return this; 322 } 323 324 public TrieMap<V> build() { 325 int size = builder.buildByteBuffer(option).remaining(); 326 BytesTrie bytesTrie = builder.build(option); 327 @SuppressWarnings("unchecked") 328 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 329 return new BytesTrieMap<V>(bytesTrie, intToValueArray, size); 330 } 331 } 332 333 private static class CharsTrieMap<V> extends TrieMap<V> { 334 private final CharsTrie charsTrie; 335 336 private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) { 337 super(intToValue, size); 338 this.charsTrie = charsTrie; 339 } 340 341 public V get(CharSequence test) { 342 Result result = charsTrie.reset().next(test, 0, test.length()); 343 return result.hasValue() ? intToValue[charsTrie.getValue()] : null; 344 } 345 346 public String toString() { 347 return toString(charsTrie, " : ", "\n"); 348 } 349 350 /** 351 * Warning: the entry contents are only valid until the next next() call!! 352 */ 353 public Iterator<Entry<CharSequence, V>> iterator() { 354 // TODO Auto-generated method stub 355 return new CharsIterator(); 356 } 357 358 public TrieMap.Matcher<V> getMatcher() { 359 return new CharsMatcher(); 360 } 361 362 private class CharsIterator implements Iterator<Entry<CharSequence, V>> { 363 CharsTrie.Iterator iterator = charsTrie.iterator(); 364 CharsEntry entry = new CharsEntry(); 365 366 public boolean hasNext() { 367 return iterator.hasNext(); 368 } 369 370 public Entry<CharSequence, V> next() { 371 entry.charsEntry = iterator.next(); 372 return entry; 373 } 374 375 public void remove() { 376 throw new UnsupportedOperationException(); 377 } 378 379 } 380 381 private class CharsEntry implements Entry<CharSequence, V> { 382 public CharsTrie.Entry charsEntry; 383 384 public CharSequence getKey() { 385 return charsEntry.chars; 386 } 387 388 public V getValue() { 389 return intToValue[charsEntry.value]; 390 } 391 392 public V setValue(V value) { 393 throw new UnsupportedOperationException(); 394 } 395 } 396 397 public class CharsMatcher extends Matcher<V> { 398 private V value = null; 399 400 public void set(CharSequence text, int start) { 401 this.text = text; 402 this.start = start; 403 this.current = start; 404 } 405 406 public int getStart() { 407 return start; 408 } 409 410 public int getEnd() { 411 return current; 412 } 413 414 /** 415 * Finds the next match. Returns false when there are no possible further matches from the current start 416 * point. Once that happens, call nextStart(); Call getValue to get the current value. 417 * 418 * @return false when done. There may be a value, however. 419 */ 420 public boolean next() { 421 while (current < text.length()) { 422 char c = text.charAt(current++); 423 Result result = charsTrie.next(c); 424 if (result.hasValue()) { 425 value = intToValue[charsTrie.getValue()]; 426 return result.hasNext(); 427 } else if (!result.matches()) { 428 value = null; 429 return false; 430 431 } 432 } 433 value = null; 434 return false; 435 } 436 437 public boolean nextStart() { 438 if (start >= text.length()) { 439 return false; 440 } 441 ++start; 442 current = start; 443 charsTrie.reset(); 444 return true; 445 } 446 447 public V getValue() { 448 return value; 449 } 450 } 451 } 452 453 public static class CharsBuilder<V> extends Builder<V> { 454 CharsTrieBuilder builder = new CharsTrieBuilder(); 455 456 public CharsBuilder<V> add(CharSequence key, V value) { 457 // traverse the values, and get a mapping of a byte string to list of 458 // integers, and a mapping from those integers to a set of values 459 Integer index; 460 if (option == Option.SMALL) { 461 index = valueToIntegerTemp.get(value); 462 if (index == null) { 463 index = intToValueTemp.size(); 464 intToValueTemp.add(value); 465 valueToIntegerTemp.put(value, index); 466 } 467 } else { 468 index = intToValueTemp.size(); 469 intToValueTemp.add(value); 470 } 471 try { 472 builder.add(key.toString(), index); 473 return this; 474 } catch (Exception e) { 475 throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e); 476 } 477 } 478 479 public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) { 480 for (Entry<K, V> entry : keyValuePairs.entrySet()) { 481 add(entry.getKey(), entry.getValue()); 482 } 483 return this; 484 } 485 486 public TrieMap<V> build() { 487 CharSequence buildCharSequence = builder.buildCharSequence(option); 488 int size = 2 * buildCharSequence.length(); 489 // CharsTrie charsTrie = builder.build(option); 490 CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0); 491 @SuppressWarnings("unchecked") 492 V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 493 return new CharsTrieMap<V>(charsTrie, intToValueArray, size); 494 } 495 } 496 497 /** 498 * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and 499 * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended 500 * for general usage 501 * 502 * <pre> 503 * 0000..007F - 0xxx xxxx 504 * 0000..7EFF - 1yyy yyyy xxxx xxxx 505 * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx 506 * </pre> 507 */ 508 static class ByteConverter { 509 public static int getBytes(char source, byte[] bytes, int limit) { 510 if (source < 0x80) { 511 bytes[limit++] = (byte) source; 512 } else if (source < 0x7F00) { 513 bytes[limit++] = (byte) (0x80 | (source >> 8)); 514 bytes[limit++] = (byte) source; 515 } else { 516 bytes[limit++] = (byte) -1; 517 bytes[limit++] = (byte) (source >> 8); 518 bytes[limit++] = (byte) source; 519 } 520 return limit; 521 } 522 523 /** 524 * Transform the string into a sequence of bytes, appending them after start, and return the new limit. 525 */ 526 public static int getBytes(CharSequence source, byte[] bytes, int limit) { 527 for (int i = 0; i < source.length(); ++i) { 528 limit = getBytes(source.charAt(i), bytes, limit); 529 } 530 return limit; 531 } 532 533 /** 534 * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking. 535 */ 536 public static String getChars(byte[] bytes, int start, int limit) { 537 StringBuilder buffer = new StringBuilder(); 538 char[] output = new char[1]; 539 for (int i = start; i < limit;) { 540 i = getChar(bytes, i, output); 541 buffer.append(output[0]); 542 } 543 return buffer.toString(); 544 } 545 546 public static int getChar(byte[] bytes, int start, char[] output) { 547 byte b = bytes[start++]; 548 if (b >= 0) { 549 output[0] = (char) b; 550 } else if (b != (byte) -1) { // 2 bytes 551 int b1 = 0x7F & b; 552 int b2 = 0xFF & bytes[start++]; 553 output[0] = (char) ((b1 << 8) | b2); 554 } else { 555 int b2 = bytes[start++]; 556 int b3 = 0xFF & bytes[start++]; 557 output[0] = (char) ((b2 << 8) | b3); 558 } 559 return start; 560 } 561 562 private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) { 563 int len = entry.bytesLength(); 564 for (int i = 0; i < len;) { 565 byte b = entry.byteAt(i++); 566 if (b >= 0) { 567 stringBuilder.append((char) b); 568 } else if (b != (byte) -1) { // 2 bytes 569 int b1 = 0x7F & b; 570 int b2 = 0xFF & entry.byteAt(i++); 571 stringBuilder.append((char) ((b1 << 8) | b2)); 572 } else { 573 int b2 = entry.byteAt(i++); 574 int b3 = 0xFF & entry.byteAt(i++); 575 stringBuilder.append((char) ((b2 << 8) | b3)); 576 } 577 } 578 } 579 } 580 581 public static String toString(BytesTrie bytesTrie2) { 582 return toString(bytesTrie2, " : ", "\n"); 583 } 584 585 public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 586 StringBuilder buffer = new StringBuilder(); 587 BytesTrie.Iterator iterator = bytesTrie2.iterator(); 588 while (iterator.hasNext()) { 589 BytesTrie.Entry bytesEntry = iterator.next(); 590 int len = bytesEntry.bytesLength(); 591 byte[] bytes = new byte[len]; 592 bytesEntry.copyBytesTo(bytes, 0); 593 buffer.append(Utility.hex(bytes, 0, len, " ")) 594 .append(keyValueSeparator) 595 .append(bytesEntry.value) 596 .append(itemSeparator); 597 } 598 return buffer.toString(); 599 } 600 601 public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 602 StringBuilder buffer = new StringBuilder(); 603 CharsTrie.Iterator iterator = bytesTrie2.iterator(); 604 while (iterator.hasNext()) { 605 CharsTrie.Entry bytesEntry = iterator.next(); 606 buffer.append(Utility.hex(bytesEntry.chars)) 607 .append(keyValueSeparator) 608 .append(bytesEntry.value) 609 .append(itemSeparator); 610 } 611 return buffer.toString(); 612 } 613 614} 615