12d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// © 2016 and later: Unicode, Inc. and others.
22d2bb24f747c65578da13d5b13b82f0669690461Fredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html#License
37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/*
47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copyright (C) 2011, Google, International Business Machines Corporation and
67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * others. All Rights Reserved.
77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert *******************************************************************************
87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */
97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.dev.test.util;
107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList;
127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.HashMap;
137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Iterator;
147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.List;
157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map;
167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.Map.Entry;
177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.impl.Utility;
197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie;
207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrie.Result;
217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.BytesTrieBuilder;
227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrie;
237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.CharsTrieBuilder;
247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport com.ibm.icu.util.StringTrieBuilder.Option;
257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// would be nice to have a BytesTrieBuilder.add(aByte);
277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// question: can bytetrie store <"",x>?
287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error,
297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// should happen on add, not on build.
307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something.
317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert// need class description; examples of usage; which method can/should be called after which others.
327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> {
347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public enum Style {
367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BYTES, CHARS
377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static final boolean DEBUG = true;
407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected final V[] intToValue;
427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected final int size;
437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    protected TrieMap(V[] intToValue, int size) {
457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.intToValue = intToValue;
467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        this.size = size;
477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public int keyByteSize() {
507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return size;
517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static abstract class Builder<V> {
547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        Option option;
557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected List<V> intToValueTemp = new ArrayList<V>();
567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>();
577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) {
597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return with(style, Option.SMALL, keyValuePairs);
607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) {
637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    : new CharsTrieMap.CharsBuilder<V>();
657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.option = option;
667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.addAll(keyValuePairs);
677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) {
707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return with(style, Option.SMALL, key, value);
717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) {
747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    : new CharsTrieMap.CharsBuilder<V>();
767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            result.option = option;
777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.add(key, value);
787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract Builder<V> add(CharSequence key, V value);
817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs);
837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public abstract TrieMap<V> build();
857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract public V get(CharSequence test);
887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Warning: the entry contents are only valid until the next next() call!!
917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract public Iterator<Entry<CharSequence, V>> iterator();
937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    abstract public Matcher<V> getMatcher();
957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public abstract static class Matcher<V> {
977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected CharSequence text = "";
987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected int start = 0;
997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        protected int current = 0;
1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract void set(CharSequence string, int i);
1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract boolean next();
1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract V getValue();
1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract int getStart();
1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract int getEnd();
1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        abstract boolean nextStart();
1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class BytesTrieMap<V> extends TrieMap<V> {
1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final BytesTrie bytesTrie;
1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private byte[] bytes = new byte[3];
1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) {
1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            super(intToValue, size);
1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.bytesTrie = bytesTrie;
1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public V get(CharSequence test) {
1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int length = test.length();
1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bytesTrie.reset();
1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (length == 0) {
1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null;
1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Result result = Result.NO_VALUE;
1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < length; ++i) {
1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (!result.hasNext()) {
1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return null;
1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c = test.charAt(i);
1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int limit = ByteConverter.getBytes(c, bytes, 0);
1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit);
1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // int length = test.length();
1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if (length == 0) {
1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // return null;
1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // }
1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // bytesTrie.reset();
1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // Result result = null;
1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // byte[] bytes = new byte[3];
1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for (int i = 0; i < length; ++i) {
1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // char c = test.charAt(i);
1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // int limit = ByteConverter.getBytes(c, bytes, 0);
1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // for (int j = 0; j < limit; ++j) {
1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // result = bytesTrie.next(bytes[j]&0xFF);
1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // if (!result.matches()) {
1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // return null;
1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // }
1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // }
1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // }
1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String toString() {
1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return toString(bytesTrie, " : ", "\n");
1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Warning: the entry contents are only valid until the next next() call!!
1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public Iterator<Entry<CharSequence, V>> iterator() {
1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO Auto-generated method stub
1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new BytesIterator();
1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public TrieMap.Matcher<V> getMatcher() {
1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new BytesMatcher();
1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private class BytesIterator implements Iterator<Entry<CharSequence, V>> {
1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BytesTrie.Iterator iterator = bytesTrie.iterator();
1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BytesEntry entry = new BytesEntry();
1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean hasNext() {
1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return iterator.hasNext();
1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public Entry<CharSequence, V> next() {
1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                entry.bytesEntry = iterator.next();
1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return entry;
1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public void remove() {
1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException();
1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private class BytesEntry implements Entry<CharSequence, V> {
1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public BytesTrie.Entry bytesEntry;
1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder buffer = new StringBuilder();
1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public CharSequence getKey() {
1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.setLength(0);
2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ByteConverter.getChars(bytesEntry, buffer);
2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return buffer;
2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V getValue() {
2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return intToValue[bytesEntry.value];
2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V setValue(V value) {
2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException();
2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public class BytesMatcher extends Matcher<V> {
2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private byte[] bytes = new byte[3];
2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private V value = null;
2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public void set(CharSequence text, int start) {
2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.text = text;
2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.start = start;
2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.current = start;
2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public int getStart() {
2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return start;
2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public int getEnd() {
2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return current;
2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /**
2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Finds the next match. Returns false when there are no possible further matches from the current start
2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * point. Once that happens, call nextStart(); Call getValue to get the current value.
2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * @return false when done. There may be a value, however.
2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean next() {
2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (current < text.length()) {
2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    char c = text.charAt(current++);
2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int limit = ByteConverter.getBytes(c, bytes, 0);
2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    for (int j = 0; j < limit; ++j) {
2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        Result result = bytesTrie.next(bytes[j]);
2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        if (result.hasValue()) {
2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            if (j < limit - 1) {
2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                                throw new IllegalArgumentException("Data corrupt");
2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            }
2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            value = intToValue[bytesTrie.getValue()];
2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return result.hasNext();
2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        } else if (!result.matches()) {
2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            value = null;
2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                            return false;
2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        }
2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = null;
2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean nextStart() {
2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (start >= text.length()) {
2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++start;
2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                current = start;
2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytesTrie.reset();
2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V getValue() {
2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static class BytesBuilder<V> extends Builder<V> {
2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BytesTrieBuilder builder = new BytesTrieBuilder();
2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        byte[] bytes = new byte[200];
2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        List<String> debugBytes = DEBUG ? new ArrayList<String>() : null;
2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public BytesBuilder<V> add(CharSequence key, V value) {
2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // traverse the values, and get a mapping of a byte string to list of
2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // integers, and a mapping from those integers to a set of values
2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Integer index;
2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (option == Option.SMALL) {
2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = valueToIntegerTemp.get(value);
2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (index == null) {
2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = intToValueTemp.size();
2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    intToValueTemp.add(value);
2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    valueToIntegerTemp.put(value, index);
2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = intToValueTemp.size();
2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                intToValueTemp.add(value);
2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // dumb implementation for now
2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // the buffer size is at most 3 * number_of_chars
2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (bytes.length < key.length() * 3) {
2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes = new byte[64 + key.length() * 3];
2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int limit = 0;
3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < key.length(); ++i) {
3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                char c = key.charAt(i);
3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = ByteConverter.getBytes(c, bytes, limit);
3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                builder.add(bytes, limit, index);
3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return this;
3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch (Exception e) {
3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ArrayList<String> list = new ArrayList<String>();
3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                for (int i = 0; i < limit; ++i) {
3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    list.add(Utility.hex(bytes[i]));
3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e);
3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) {
3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(entry.getKey(), entry.getValue());
3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public TrieMap<V> build() {
3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int size = builder.buildByteBuffer(option).remaining();
3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BytesTrie bytesTrie = builder.build(option);
3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            @SuppressWarnings("unchecked")
3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new BytesTrieMap<V>(bytesTrie, intToValueArray, size);
3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    private static class CharsTrieMap<V> extends TrieMap<V> {
3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private final CharsTrie charsTrie;
3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) {
3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            super(intToValue, size);
3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            this.charsTrie = charsTrie;
3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public V get(CharSequence test) {
3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Result result = charsTrie.reset().next(test, 0, test.length());
3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return result.hasValue() ? intToValue[charsTrie.getValue()] : null;
3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public String toString() {
3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return toString(charsTrie, " : ", "\n");
3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Warning: the entry contents are only valid until the next next() call!!
3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public Iterator<Entry<CharSequence, V>> iterator() {
3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // TODO Auto-generated method stub
3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new CharsIterator();
3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public TrieMap.Matcher<V> getMatcher() {
3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new CharsMatcher();
3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private class CharsIterator implements Iterator<Entry<CharSequence, V>> {
3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharsTrie.Iterator iterator = charsTrie.iterator();
3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharsEntry entry = new CharsEntry();
3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean hasNext() {
3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return iterator.hasNext();
3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public Entry<CharSequence, V> next() {
3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                entry.charsEntry = iterator.next();
3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return entry;
3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public void remove() {
3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException();
3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private class CharsEntry implements Entry<CharSequence, V> {
3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public CharsTrie.Entry charsEntry;
3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public CharSequence getKey() {
3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return charsEntry.chars;
3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V getValue() {
3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return intToValue[charsEntry.value];
3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V setValue(V value) {
3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new UnsupportedOperationException();
3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public class CharsMatcher extends Matcher<V> {
3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            private V value = null;
3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public void set(CharSequence text, int start) {
4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.text = text;
4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.start = start;
4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                this.current = start;
4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public int getStart() {
4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return start;
4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public int getEnd() {
4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return current;
4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            /**
4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * Finds the next match. Returns false when there are no possible further matches from the current start
4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * point. Once that happens, call nextStart(); Call getValue to get the current value.
4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             *
4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             * @return false when done. There may be a value, however.
4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert             */
4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean next() {
4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                while (current < text.length()) {
4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    char c = text.charAt(current++);
4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    Result result = charsTrie.next(c);
4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    if (result.hasValue()) {
4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        value = intToValue[charsTrie.getValue()];
4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return result.hasNext();
4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    } else if (!result.matches()) {
4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        value = null;
4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                        return false;
4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    }
4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                value = null;
4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return false;
4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public boolean nextStart() {
4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (start >= text.length()) {
4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    return false;
4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                ++start;
4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                current = start;
4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                charsTrie.reset();
4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return true;
4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            public V getValue() {
4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return value;
4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static class CharsBuilder<V> extends Builder<V> {
4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrieBuilder builder = new CharsTrieBuilder();
4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public CharsBuilder<V> add(CharSequence key, V value) {
4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // traverse the values, and get a mapping of a byte string to list of
4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // integers, and a mapping from those integers to a set of values
4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            Integer index;
4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (option == Option.SMALL) {
4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = valueToIntegerTemp.get(value);
4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (index == null) {
4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    index = intToValueTemp.size();
4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    intToValueTemp.add(value);
4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    valueToIntegerTemp.put(value, index);
4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                index = intToValueTemp.size();
4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                intToValueTemp.add(value);
4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            try {
4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                builder.add(key.toString(), index);
4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                return this;
4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } catch (Exception e) {
4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e);
4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) {
4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                add(entry.getKey(), entry.getValue());
4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return this;
4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public TrieMap<V> build() {
4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharSequence buildCharSequence = builder.buildCharSequence(option);
4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int size = 2 * buildCharSequence.length();
4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            // CharsTrie charsTrie = builder.build(option);
4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0);
4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            @SuppressWarnings("unchecked")
4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return new CharsTrieMap<V>(charsTrie, intToValueArray, size);
4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    /**
4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and
4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended
5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * for general usage
5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     *
5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * <pre>
5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 0000..007F - 0xxx xxxx
5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 0000..7EFF - 1yyy yyyy xxxx xxxx
5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx
5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     * </pre>
5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert     */
5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    static class ByteConverter {
5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static int getBytes(char source, byte[] bytes, int limit) {
5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (source < 0x80) {
5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) source;
5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (source < 0x7F00) {
5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) (0x80 | (source >> 8));
5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) source;
5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) -1;
5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) (source >> 8);
5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                bytes[limit++] = (byte) source;
5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return limit;
5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Transform the string into a sequence of bytes, appending them after start, and return the new limit.
5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static int getBytes(CharSequence source, byte[] bytes, int limit) {
5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < source.length(); ++i) {
5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                limit = getBytes(source.charAt(i), bytes, limit);
5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return limit;
5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        /**
5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking.
5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert         */
5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static String getChars(byte[] bytes, int start, int limit) {
5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            StringBuilder buffer = new StringBuilder();
5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            char[] output = new char[1];
5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = start; i < limit;) {
5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                i = getChar(bytes, i, output);
5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                buffer.append(output[0]);
5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return buffer.toString();
5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        public static int getChar(byte[] bytes, int start, char[] output) {
5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            byte b = bytes[start++];
5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            if (b >= 0) {
5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                output[0] = (char) b;
5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else if (b != (byte) -1) { // 2 bytes
5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int b1 = 0x7F & b;
5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int b2 = 0xFF & bytes[start++];
5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                output[0] = (char) ((b1 << 8) | b2);
5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            } else {
5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int b2 = bytes[start++];
5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                int b3 = 0xFF & bytes[start++];
5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                output[0] = (char) ((b2 << 8) | b3);
5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            return start;
5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) {
5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int len = entry.bytesLength();
5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            for (int i = 0; i < len;) {
5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                byte b = entry.byteAt(i++);
5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                if (b >= 0) {
5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stringBuilder.append((char) b);
5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else if (b != (byte) -1) { // 2 bytes
5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int b1 = 0x7F & b;
5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int b2 = 0xFF & entry.byteAt(i++);
5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stringBuilder.append((char) ((b1 << 8) | b2));
5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                } else {
5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int b2 = entry.byteAt(i++);
5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    int b3 = 0xFF & entry.byteAt(i++);
5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    stringBuilder.append((char) ((b2 << 8) | b3));
5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                }
5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            }
5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String toString(BytesTrie bytesTrie2) {
5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return toString(bytesTrie2, " : ", "\n");
5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder buffer = new StringBuilder();
5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        BytesTrie.Iterator iterator = bytesTrie2.iterator();
5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (iterator.hasNext()) {
5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            BytesTrie.Entry bytesEntry = iterator.next();
5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            int len = bytesEntry.bytesLength();
5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            byte[] bytes = new byte[len];
5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            bytesEntry.copyBytesTo(bytes, 0);
5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buffer.append(Utility.hex(bytes, 0, len, " "))
5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(keyValueSeparator)
5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(bytesEntry.value)
5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(itemSeparator);
5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return buffer.toString();
5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        StringBuilder buffer = new StringBuilder();
6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        CharsTrie.Iterator iterator = bytesTrie2.iterator();
6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        while (iterator.hasNext()) {
6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            CharsTrie.Entry bytesEntry = iterator.next();
6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert            buffer.append(Utility.hex(bytesEntry.chars))
6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(keyValueSeparator)
6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(bytesEntry.value)
6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert                    .append(itemSeparator);
6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        }
6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert        return buffer.toString();
6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert    }
6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert
6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}
615