12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others. 22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2011, Google, International Business Machines Corporation and 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved. 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ******************************************************************************* 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.util; 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map.Entry; 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility; 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie; 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie.Result; 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrieBuilder; 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie; 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrieBuilder; 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.StringTrieBuilder.Option; 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// would be nice to have a BytesTrieBuilder.add(aByte); 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// question: can bytetrie store <"",x>? 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error, 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// should happen on add, not on build. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something. 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// need class description; examples of usage; which method can/should be called after which others. 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> { 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public enum Style { 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BYTES, CHARS 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final boolean DEBUG = true; 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected final V[] intToValue; 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected final int size; 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected TrieMap(V[] intToValue, int size) { 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.intToValue = intToValue; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.size = size; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int keyByteSize() { 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return size; 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static abstract class Builder<V> { 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Option option; 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected List<V> intToValueTemp = new ArrayList<V>(); 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>(); 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) { 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return with(style, Option.SMALL, keyValuePairs); 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) { 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : new CharsTrieMap.CharsBuilder<V>(); 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.option = option; 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.addAll(keyValuePairs); 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) { 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return with(style, Option.SMALL, key, value); 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) { 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>() 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert : new CharsTrieMap.CharsBuilder<V>(); 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result.option = option; 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.add(key, value); 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract Builder<V> add(CharSequence key, V value); 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs); 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract TrieMap<V> build(); 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract public V get(CharSequence test); 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Warning: the entry contents are only valid until the next next() call!! 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract public Iterator<Entry<CharSequence, V>> iterator(); 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract public Matcher<V> getMatcher(); 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public abstract static class Matcher<V> { 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected CharSequence text = ""; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int start = 0; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert protected int current = 0; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract void set(CharSequence string, int i); 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract boolean next(); 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract V getValue(); 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract int getStart(); 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract int getEnd(); 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert abstract boolean nextStart(); 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static class BytesTrieMap<V> extends TrieMap<V> { 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final BytesTrie bytesTrie; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes = new byte[3]; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) { 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(intToValue, size); 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.bytesTrie = bytesTrie; 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V get(CharSequence test) { 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length = test.length(); 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytesTrie.reset(); 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (length == 0) { 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null; 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result = Result.NO_VALUE; 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < length; ++i) { 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (!result.hasNext()) { 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return null; 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = test.charAt(i); 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = ByteConverter.getBytes(c, bytes, 0); 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit); 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // int length = test.length(); 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if (length == 0) { 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // return null; 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // } 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // bytesTrie.reset(); 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Result result = null; 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // byte[] bytes = new byte[3]; 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // for (int i = 0; i < length; ++i) { 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // char c = test.charAt(i); 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // int limit = ByteConverter.getBytes(c, bytes, 0); 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // for (int j = 0; j < limit; ++j) { 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // result = bytesTrie.next(bytes[j]&0xFF); 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // if (!result.matches()) { 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // return null; 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // } 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // } 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // } 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null; 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toString() { 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return toString(bytesTrie, " : ", "\n"); 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Warning: the entry contents are only valid until the next next() call!! 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Iterator<Entry<CharSequence, V>> iterator() { 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Auto-generated method stub 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new BytesIterator(); 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TrieMap.Matcher<V> getMatcher() { 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new BytesMatcher(); 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private class BytesIterator implements Iterator<Entry<CharSequence, V>> { 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrie.Iterator iterator = bytesTrie.iterator(); 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesEntry entry = new BytesEntry(); 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasNext() { 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return iterator.hasNext(); 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Entry<CharSequence, V> next() { 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry.bytesEntry = iterator.next(); 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry; 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void remove() { 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new UnsupportedOperationException(); 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private class BytesEntry implements Entry<CharSequence, V> { 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesTrie.Entry bytesEntry; 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder buffer = new StringBuilder(); 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharSequence getKey() { 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.setLength(0); 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ByteConverter.getChars(bytesEntry, buffer); 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return buffer; 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V getValue() { 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return intToValue[bytesEntry.value]; 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V setValue(V value) { 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new UnsupportedOperationException(); 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public class BytesMatcher extends Matcher<V> { 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes = new byte[3]; 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private V value = null; 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void set(CharSequence text, int start) { 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.text = text; 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.start = start; 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.current = start; 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getStart() { 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return start; 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getEnd() { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return current; 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds the next match. Returns false when there are no possible further matches from the current start 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * point. Once that happens, call nextStart(); Call getValue to get the current value. 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return false when done. There may be a value, however. 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean next() { 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (current < text.length()) { 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = text.charAt(current++); 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = ByteConverter.getBytes(c, bytes, 0); 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int j = 0; j < limit; ++j) { 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result = bytesTrie.next(bytes[j]); 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result.hasValue()) { 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (j < limit - 1) { 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Data corrupt"); 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = intToValue[bytesTrie.getValue()]; 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.hasNext(); 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (!result.matches()) { 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = null; 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = null; 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean nextStart() { 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (start >= text.length()) { 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++start; 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert current = start; 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytesTrie.reset(); 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V getValue() { 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static class BytesBuilder<V> extends Builder<V> { 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrieBuilder builder = new BytesTrieBuilder(); 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte[] bytes = new byte[200]; 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert List<String> debugBytes = DEBUG ? new ArrayList<String>() : null; 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesBuilder<V> add(CharSequence key, V value) { 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // traverse the values, and get a mapping of a byte string to list of 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // integers, and a mapping from those integers to a set of values 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Integer index; 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (option == Option.SMALL) { 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = valueToIntegerTemp.get(value); 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index == null) { 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = intToValueTemp.size(); 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert intToValueTemp.add(value); 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueToIntegerTemp.put(value, index); 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = intToValueTemp.size(); 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert intToValueTemp.add(value); 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // dumb implementation for now 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the buffer size is at most 3 * number_of_chars 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (bytes.length < key.length() * 3) { 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes = new byte[64 + key.length() * 3]; 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int limit = 0; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < key.length(); ++i) { 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = key.charAt(i); 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit = ByteConverter.getBytes(c, bytes, limit); 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert builder.add(bytes, limit, index); 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ArrayList<String> list = new ArrayList<String>(); 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < limit; ++i) { 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert list.add(Utility.hex(bytes[i])); 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e); 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) { 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Entry<K, V> entry : keyValuePairs.entrySet()) { 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert add(entry.getKey(), entry.getValue()); 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TrieMap<V> build() { 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int size = builder.buildByteBuffer(option).remaining(); 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrie bytesTrie = builder.build(option); 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @SuppressWarnings("unchecked") 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new BytesTrieMap<V>(bytesTrie, intToValueArray, size); 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static class CharsTrieMap<V> extends TrieMap<V> { 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private final CharsTrie charsTrie; 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) { 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert super(intToValue, size); 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.charsTrie = charsTrie; 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V get(CharSequence test) { 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result = charsTrie.reset().next(test, 0, test.length()); 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.hasValue() ? intToValue[charsTrie.getValue()] : null; 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public String toString() { 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return toString(charsTrie, " : ", "\n"); 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Warning: the entry contents are only valid until the next next() call!! 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Iterator<Entry<CharSequence, V>> iterator() { 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // TODO Auto-generated method stub 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new CharsIterator(); 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TrieMap.Matcher<V> getMatcher() { 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new CharsMatcher(); 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private class CharsIterator implements Iterator<Entry<CharSequence, V>> { 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsTrie.Iterator iterator = charsTrie.iterator(); 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsEntry entry = new CharsEntry(); 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasNext() { 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return iterator.hasNext(); 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Entry<CharSequence, V> next() { 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry.charsEntry = iterator.next(); 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry; 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void remove() { 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new UnsupportedOperationException(); 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private class CharsEntry implements Entry<CharSequence, V> { 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharsTrie.Entry charsEntry; 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharSequence getKey() { 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return charsEntry.chars; 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V getValue() { 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return intToValue[charsEntry.value]; 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V setValue(V value) { 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new UnsupportedOperationException(); 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public class CharsMatcher extends Matcher<V> { 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private V value = null; 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void set(CharSequence text, int start) { 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.text = text; 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.start = start; 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert this.current = start; 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getStart() { 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return start; 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getEnd() { 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return current; 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds the next match. Returns false when there are no possible further matches from the current start 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * point. Once that happens, call nextStart(); Call getValue to get the current value. 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return false when done. There may be a value, however. 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean next() { 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (current < text.length()) { 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char c = text.charAt(current++); 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result = charsTrie.next(c); 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (result.hasValue()) { 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = intToValue[charsTrie.getValue()]; 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result.hasNext(); 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (!result.matches()) { 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = null; 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value = null; 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean nextStart() { 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (start >= text.length()) { 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return false; 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++start; 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert current = start; 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert charsTrie.reset(); 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return true; 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public V getValue() { 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static class CharsBuilder<V> extends Builder<V> { 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsTrieBuilder builder = new CharsTrieBuilder(); 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public CharsBuilder<V> add(CharSequence key, V value) { 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // traverse the values, and get a mapping of a byte string to list of 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // integers, and a mapping from those integers to a set of values 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Integer index; 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (option == Option.SMALL) { 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = valueToIntegerTemp.get(value); 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (index == null) { 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = intToValueTemp.size(); 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert intToValueTemp.add(value); 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueToIntegerTemp.put(value, index); 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert index = intToValueTemp.size(); 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert intToValueTemp.add(value); 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert builder.add(key.toString(), index); 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch (Exception e) { 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e); 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) { 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (Entry<K, V> entry : keyValuePairs.entrySet()) { 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert add(entry.getKey(), entry.getValue()); 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public TrieMap<V> build() { 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharSequence buildCharSequence = builder.buildCharSequence(option); 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int size = 2 * buildCharSequence.length(); 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // CharsTrie charsTrie = builder.build(option); 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0); 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @SuppressWarnings("unchecked") 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()])); 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new CharsTrieMap<V>(charsTrie, intToValueArray, size); 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for general usage 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 0000..007F - 0xxx xxxx 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 0000..7EFF - 1yyy yyyy xxxx xxxx 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert static class ByteConverter { 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int getBytes(char source, byte[] bytes, int limit) { 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (source < 0x80) { 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) source; 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (source < 0x7F00) { 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) (0x80 | (source >> 8)); 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) source; 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) -1; 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) (source >> 8); 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[limit++] = (byte) source; 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return limit; 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transform the string into a sequence of bytes, appending them after start, and return the new limit. 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int getBytes(CharSequence source, byte[] bytes, int limit) { 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < source.length(); ++i) { 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert limit = getBytes(source.charAt(i), bytes, limit); 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return limit; 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking. 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String getChars(byte[] bytes, int start, int limit) { 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder buffer = new StringBuilder(); 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert char[] output = new char[1]; 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = start; i < limit;) { 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert i = getChar(bytes, i, output); 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append(output[0]); 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return buffer.toString(); 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static int getChar(byte[] bytes, int start, char[] output) { 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b = bytes[start++]; 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (b >= 0) { 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert output[0] = (char) b; 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (b != (byte) -1) { // 2 bytes 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b1 = 0x7F & b; 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b2 = 0xFF & bytes[start++]; 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert output[0] = (char) ((b1 << 8) | b2); 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b2 = bytes[start++]; 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b3 = 0xFF & bytes[start++]; 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert output[0] = (char) ((b2 << 8) | b3); 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return start; 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) { 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len = entry.bytesLength(); 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for (int i = 0; i < len;) { 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte b = entry.byteAt(i++); 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if (b >= 0) { 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stringBuilder.append((char) b); 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if (b != (byte) -1) { // 2 bytes 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b1 = 0x7F & b; 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b2 = 0xFF & entry.byteAt(i++); 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stringBuilder.append((char) ((b1 << 8) | b2)); 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b2 = entry.byteAt(i++); 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int b3 = 0xFF & entry.byteAt(i++); 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stringBuilder.append((char) ((b2 << 8) | b3)); 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String toString(BytesTrie bytesTrie2) { 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return toString(bytesTrie2, " : ", "\n"); 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder buffer = new StringBuilder(); 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrie.Iterator iterator = bytesTrie2.iterator(); 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (iterator.hasNext()) { 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert BytesTrie.Entry bytesEntry = iterator.next(); 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int len = bytesEntry.bytesLength(); 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte[] bytes = new byte[len]; 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytesEntry.copyBytesTo(bytes, 0); 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append(Utility.hex(bytes, 0, len, " ")) 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(keyValueSeparator) 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(bytesEntry.value) 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(itemSeparator); 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return buffer.toString(); 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) { 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert StringBuilder buffer = new StringBuilder(); 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsTrie.Iterator iterator = bytesTrie2.iterator(); 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while (iterator.hasNext()) { 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert CharsTrie.Entry bytesEntry = iterator.next(); 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert buffer.append(Utility.hex(bytesEntry.chars)) 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(keyValueSeparator) 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(bytesEntry.value) 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert .append(itemSeparator); 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return buffer.toString(); 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert} 615