1/*
2 *******************************************************************************
3 * Copyright (C) 2011, Google, International Business Machines Corporation and
4 * others. All Rights Reserved.
5 *******************************************************************************
6 */
7package com.ibm.icu.dev.test.util;
8
9import java.util.ArrayList;
10import java.util.HashMap;
11import java.util.Iterator;
12import java.util.List;
13import java.util.Map;
14import java.util.Map.Entry;
15
16import com.ibm.icu.impl.Utility;
17import com.ibm.icu.util.BytesTrie;
18import com.ibm.icu.util.BytesTrie.Result;
19import com.ibm.icu.util.BytesTrieBuilder;
20import com.ibm.icu.util.CharsTrie;
21import com.ibm.icu.util.CharsTrieBuilder;
22import com.ibm.icu.util.StringTrieBuilder.Option;
23
24// would be nice to have a BytesTrieBuilder.add(aByte);
25// question: can bytetrie store <"",x>?
26// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error,
27// should happen on add, not on build.
28// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something.
29// need class description; examples of usage; which method can/should be called after which others.
30
31public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> {
32
33    public enum Style {
34        BYTES, CHARS
35    }
36
37    private static final boolean DEBUG = true;
38
39    protected final V[] intToValue;
40    protected final int size;
41
42    protected TrieMap(V[] intToValue, int size) {
43        this.intToValue = intToValue;
44        this.size = size;
45    }
46
47    public int keyByteSize() {
48        return size;
49    }
50
51    public static abstract class Builder<V> {
52        Option option;
53        protected List<V> intToValueTemp = new ArrayList<V>();
54        protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>();
55
56        static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) {
57            return with(style, Option.SMALL, keyValuePairs);
58        }
59
60        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) {
61            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
62                    : new CharsTrieMap.CharsBuilder<V>();
63            result.option = option;
64            return result.addAll(keyValuePairs);
65        }
66
67        static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) {
68            return with(style, Option.SMALL, key, value);
69        }
70
71        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) {
72            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
73                    : new CharsTrieMap.CharsBuilder<V>();
74            result.option = option;
75            return result.add(key, value);
76        }
77
78        public abstract Builder<V> add(CharSequence key, V value);
79
80        public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs);
81
82        public abstract TrieMap<V> build();
83    }
84
85    abstract public V get(CharSequence test);
86
87    /**
88     * Warning: the entry contents are only valid until the next next() call!!
89     */
90    abstract public Iterator<Entry<CharSequence, V>> iterator();
91
92    abstract public Matcher<V> getMatcher();
93
94    public abstract static class Matcher<V> {
95        protected CharSequence text = "";
96        protected int start = 0;
97        protected int current = 0;
98
99        abstract void set(CharSequence string, int i);
100
101        abstract boolean next();
102
103        abstract V getValue();
104
105        abstract int getStart();
106
107        abstract int getEnd();
108
109        abstract boolean nextStart();
110    }
111
112    private static class BytesTrieMap<V> extends TrieMap<V> {
113        private final BytesTrie bytesTrie;
114        private byte[] bytes = new byte[3];
115
116        private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) {
117            super(intToValue, size);
118            this.bytesTrie = bytesTrie;
119        }
120
121        public V get(CharSequence test) {
122            int length = test.length();
123            bytesTrie.reset();
124            if (length == 0) {
125                return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null;
126            }
127            Result result = Result.NO_VALUE;
128            for (int i = 0; i < length; ++i) {
129                if (!result.hasNext()) {
130                    return null;
131                }
132                char c = test.charAt(i);
133                int limit = ByteConverter.getBytes(c, bytes, 0);
134                result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit);
135            }
136            return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
137
138            // int length = test.length();
139            // if (length == 0) {
140            // return null;
141            // }
142            // bytesTrie.reset();
143            // Result result = null;
144            // byte[] bytes = new byte[3];
145            // for (int i = 0; i < length; ++i) {
146            // char c = test.charAt(i);
147            // int limit = ByteConverter.getBytes(c, bytes, 0);
148            // for (int j = 0; j < limit; ++j) {
149            // result = bytesTrie.next(bytes[j]&0xFF);
150            // if (!result.matches()) {
151            // return null;
152            // }
153            // }
154            // }
155            // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
156        }
157
158        public String toString() {
159            return toString(bytesTrie, " : ", "\n");
160        }
161
162        /**
163         * Warning: the entry contents are only valid until the next next() call!!
164         */
165        public Iterator<Entry<CharSequence, V>> iterator() {
166            // TODO Auto-generated method stub
167            return new BytesIterator();
168        }
169
170        public TrieMap.Matcher<V> getMatcher() {
171            return new BytesMatcher();
172        }
173
174        private class BytesIterator implements Iterator<Entry<CharSequence, V>> {
175            BytesTrie.Iterator iterator = bytesTrie.iterator();
176            BytesEntry entry = new BytesEntry();
177
178            public boolean hasNext() {
179                return iterator.hasNext();
180            }
181
182            public Entry<CharSequence, V> next() {
183                entry.bytesEntry = iterator.next();
184                return entry;
185            }
186
187            public void remove() {
188                throw new UnsupportedOperationException();
189            }
190        }
191
192        private class BytesEntry implements Entry<CharSequence, V> {
193            public BytesTrie.Entry bytesEntry;
194            StringBuilder buffer = new StringBuilder();
195
196            public CharSequence getKey() {
197                buffer.setLength(0);
198                ByteConverter.getChars(bytesEntry, buffer);
199                return buffer;
200            }
201
202            public V getValue() {
203                return intToValue[bytesEntry.value];
204            }
205
206            public V setValue(V value) {
207                throw new UnsupportedOperationException();
208            }
209        }
210
211        public class BytesMatcher extends Matcher<V> {
212            private byte[] bytes = new byte[3];
213            private V value = null;
214
215            public void set(CharSequence text, int start) {
216                this.text = text;
217                this.start = start;
218                this.current = start;
219            }
220
221            public int getStart() {
222                return start;
223            }
224
225            public int getEnd() {
226                return current;
227            }
228
229            /**
230             * Finds the next match. Returns false when there are no possible further matches from the current start
231             * point. Once that happens, call nextStart(); Call getValue to get the current value.
232             *
233             * @return false when done. There may be a value, however.
234             */
235            public boolean next() {
236                while (current < text.length()) {
237                    char c = text.charAt(current++);
238                    int limit = ByteConverter.getBytes(c, bytes, 0);
239                    for (int j = 0; j < limit; ++j) {
240                        Result result = bytesTrie.next(bytes[j]);
241                        if (result.hasValue()) {
242                            if (j < limit - 1) {
243                                throw new IllegalArgumentException("Data corrupt");
244                            }
245                            value = intToValue[bytesTrie.getValue()];
246                            return result.hasNext();
247                        } else if (!result.matches()) {
248                            value = null;
249                            return false;
250                        }
251                    }
252                }
253                value = null;
254                return false;
255            }
256
257            public boolean nextStart() {
258                if (start >= text.length()) {
259                    return false;
260                }
261                ++start;
262                current = start;
263                bytesTrie.reset();
264                return true;
265            }
266
267            public V getValue() {
268                return value;
269            }
270        }
271    }
272
273    public static class BytesBuilder<V> extends Builder<V> {
274        BytesTrieBuilder builder = new BytesTrieBuilder();
275        byte[] bytes = new byte[200];
276        List<String> debugBytes = DEBUG ? new ArrayList<String>() : null;
277
278        public BytesBuilder<V> add(CharSequence key, V value) {
279            // traverse the values, and get a mapping of a byte string to list of
280            // integers, and a mapping from those integers to a set of values
281            Integer index;
282            if (option == Option.SMALL) {
283                index = valueToIntegerTemp.get(value);
284                if (index == null) {
285                    index = intToValueTemp.size();
286                    intToValueTemp.add(value);
287                    valueToIntegerTemp.put(value, index);
288                }
289            } else {
290                index = intToValueTemp.size();
291                intToValueTemp.add(value);
292            }
293            // dumb implementation for now
294            // the buffer size is at most 3 * number_of_chars
295            if (bytes.length < key.length() * 3) {
296                bytes = new byte[64 + key.length() * 3];
297            }
298            int limit = 0;
299            for (int i = 0; i < key.length(); ++i) {
300                char c = key.charAt(i);
301                limit = ByteConverter.getBytes(c, bytes, limit);
302            }
303            try {
304                builder.add(bytes, limit, index);
305                return this;
306            } catch (Exception e) {
307                ArrayList<String> list = new ArrayList<String>();
308                for (int i = 0; i < limit; ++i) {
309                    list.add(Utility.hex(bytes[i]));
310                }
311                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e);
312            }
313        }
314
315        public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) {
316            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
317                add(entry.getKey(), entry.getValue());
318            }
319            return this;
320        }
321
322        public TrieMap<V> build() {
323            int size = builder.buildByteBuffer(option).remaining();
324            BytesTrie bytesTrie = builder.build(option);
325            @SuppressWarnings("unchecked")
326            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
327            return new BytesTrieMap<V>(bytesTrie, intToValueArray, size);
328        }
329    }
330
331    private static class CharsTrieMap<V> extends TrieMap<V> {
332        private final CharsTrie charsTrie;
333
334        private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) {
335            super(intToValue, size);
336            this.charsTrie = charsTrie;
337        }
338
339        public V get(CharSequence test) {
340            Result result = charsTrie.reset().next(test, 0, test.length());
341            return result.hasValue() ? intToValue[charsTrie.getValue()] : null;
342        }
343
344        public String toString() {
345            return toString(charsTrie, " : ", "\n");
346        }
347
348        /**
349         * Warning: the entry contents are only valid until the next next() call!!
350         */
351        public Iterator<Entry<CharSequence, V>> iterator() {
352            // TODO Auto-generated method stub
353            return new CharsIterator();
354        }
355
356        public TrieMap.Matcher<V> getMatcher() {
357            return new CharsMatcher();
358        }
359
360        private class CharsIterator implements Iterator<Entry<CharSequence, V>> {
361            CharsTrie.Iterator iterator = charsTrie.iterator();
362            CharsEntry entry = new CharsEntry();
363
364            public boolean hasNext() {
365                return iterator.hasNext();
366            }
367
368            public Entry<CharSequence, V> next() {
369                entry.charsEntry = iterator.next();
370                return entry;
371            }
372
373            public void remove() {
374                throw new UnsupportedOperationException();
375            }
376
377        }
378
379        private class CharsEntry implements Entry<CharSequence, V> {
380            public CharsTrie.Entry charsEntry;
381
382            public CharSequence getKey() {
383                return charsEntry.chars;
384            }
385
386            public V getValue() {
387                return intToValue[charsEntry.value];
388            }
389
390            public V setValue(V value) {
391                throw new UnsupportedOperationException();
392            }
393        }
394
395        public class CharsMatcher extends Matcher<V> {
396            private V value = null;
397
398            public void set(CharSequence text, int start) {
399                this.text = text;
400                this.start = start;
401                this.current = start;
402            }
403
404            public int getStart() {
405                return start;
406            }
407
408            public int getEnd() {
409                return current;
410            }
411
412            /**
413             * Finds the next match. Returns false when there are no possible further matches from the current start
414             * point. Once that happens, call nextStart(); Call getValue to get the current value.
415             *
416             * @return false when done. There may be a value, however.
417             */
418            public boolean next() {
419                while (current < text.length()) {
420                    char c = text.charAt(current++);
421                    Result result = charsTrie.next(c);
422                    if (result.hasValue()) {
423                        value = intToValue[charsTrie.getValue()];
424                        return result.hasNext();
425                    } else if (!result.matches()) {
426                        value = null;
427                        return false;
428
429                    }
430                }
431                value = null;
432                return false;
433            }
434
435            public boolean nextStart() {
436                if (start >= text.length()) {
437                    return false;
438                }
439                ++start;
440                current = start;
441                charsTrie.reset();
442                return true;
443            }
444
445            public V getValue() {
446                return value;
447            }
448        }
449    }
450
451    public static class CharsBuilder<V> extends Builder<V> {
452        CharsTrieBuilder builder = new CharsTrieBuilder();
453
454        public CharsBuilder<V> add(CharSequence key, V value) {
455            // traverse the values, and get a mapping of a byte string to list of
456            // integers, and a mapping from those integers to a set of values
457            Integer index;
458            if (option == Option.SMALL) {
459                index = valueToIntegerTemp.get(value);
460                if (index == null) {
461                    index = intToValueTemp.size();
462                    intToValueTemp.add(value);
463                    valueToIntegerTemp.put(value, index);
464                }
465            } else {
466                index = intToValueTemp.size();
467                intToValueTemp.add(value);
468            }
469            try {
470                builder.add(key.toString(), index);
471                return this;
472            } catch (Exception e) {
473                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e);
474            }
475        }
476
477        public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) {
478            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
479                add(entry.getKey(), entry.getValue());
480            }
481            return this;
482        }
483
484        public TrieMap<V> build() {
485            CharSequence buildCharSequence = builder.buildCharSequence(option);
486            int size = 2 * buildCharSequence.length();
487            // CharsTrie charsTrie = builder.build(option);
488            CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0);
489            @SuppressWarnings("unchecked")
490            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
491            return new CharsTrieMap<V>(charsTrie, intToValueArray, size);
492        }
493    }
494
495    /**
496     * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and
497     * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended
498     * for general usage
499     *
500     * <pre>
501     * 0000..007F - 0xxx xxxx
502     * 0000..7EFF - 1yyy yyyy xxxx xxxx
503     * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx
504     * </pre>
505     */
506    static class ByteConverter {
507        public static int getBytes(char source, byte[] bytes, int limit) {
508            if (source < 0x80) {
509                bytes[limit++] = (byte) source;
510            } else if (source < 0x7F00) {
511                bytes[limit++] = (byte) (0x80 | (source >> 8));
512                bytes[limit++] = (byte) source;
513            } else {
514                bytes[limit++] = (byte) -1;
515                bytes[limit++] = (byte) (source >> 8);
516                bytes[limit++] = (byte) source;
517            }
518            return limit;
519        }
520
521        /**
522         * Transform the string into a sequence of bytes, appending them after start, and return the new limit.
523         */
524        public static int getBytes(CharSequence source, byte[] bytes, int limit) {
525            for (int i = 0; i < source.length(); ++i) {
526                limit = getBytes(source.charAt(i), bytes, limit);
527            }
528            return limit;
529        }
530
531        /**
532         * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking.
533         */
534        public static String getChars(byte[] bytes, int start, int limit) {
535            StringBuilder buffer = new StringBuilder();
536            char[] output = new char[1];
537            for (int i = start; i < limit;) {
538                i = getChar(bytes, i, output);
539                buffer.append(output[0]);
540            }
541            return buffer.toString();
542        }
543
544        public static int getChar(byte[] bytes, int start, char[] output) {
545            byte b = bytes[start++];
546            if (b >= 0) {
547                output[0] = (char) b;
548            } else if (b != (byte) -1) { // 2 bytes
549                int b1 = 0x7F & b;
550                int b2 = 0xFF & bytes[start++];
551                output[0] = (char) ((b1 << 8) | b2);
552            } else {
553                int b2 = bytes[start++];
554                int b3 = 0xFF & bytes[start++];
555                output[0] = (char) ((b2 << 8) | b3);
556            }
557            return start;
558        }
559
560        private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) {
561            int len = entry.bytesLength();
562            for (int i = 0; i < len;) {
563                byte b = entry.byteAt(i++);
564                if (b >= 0) {
565                    stringBuilder.append((char) b);
566                } else if (b != (byte) -1) { // 2 bytes
567                    int b1 = 0x7F & b;
568                    int b2 = 0xFF & entry.byteAt(i++);
569                    stringBuilder.append((char) ((b1 << 8) | b2));
570                } else {
571                    int b2 = entry.byteAt(i++);
572                    int b3 = 0xFF & entry.byteAt(i++);
573                    stringBuilder.append((char) ((b2 << 8) | b3));
574                }
575            }
576        }
577    }
578
579    public static String toString(BytesTrie bytesTrie2) {
580        return toString(bytesTrie2, " : ", "\n");
581    }
582
583    public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
584        StringBuilder buffer = new StringBuilder();
585        BytesTrie.Iterator iterator = bytesTrie2.iterator();
586        while (iterator.hasNext()) {
587            BytesTrie.Entry bytesEntry = iterator.next();
588            int len = bytesEntry.bytesLength();
589            byte[] bytes = new byte[len];
590            bytesEntry.copyBytesTo(bytes, 0);
591            buffer.append(Utility.hex(bytes, 0, len, " "))
592                    .append(keyValueSeparator)
593                    .append(bytesEntry.value)
594                    .append(itemSeparator);
595        }
596        return buffer.toString();
597    }
598
599    public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
600        StringBuilder buffer = new StringBuilder();
601        CharsTrie.Iterator iterator = bytesTrie2.iterator();
602        while (iterator.hasNext()) {
603            CharsTrie.Entry bytesEntry = iterator.next();
604            buffer.append(Utility.hex(bytesEntry.chars))
605                    .append(keyValueSeparator)
606                    .append(bytesEntry.value)
607                    .append(itemSeparator);
608        }
609        return buffer.toString();
610    }
611
612}
613