1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 *******************************************************************************
5 * Copyright (C) 2011, Google, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 *******************************************************************************
8 */
9package com.ibm.icu.dev.test.util;
10
11import java.util.ArrayList;
12import java.util.HashMap;
13import java.util.Iterator;
14import java.util.List;
15import java.util.Map;
16import java.util.Map.Entry;
17
18import com.ibm.icu.impl.Utility;
19import com.ibm.icu.util.BytesTrie;
20import com.ibm.icu.util.BytesTrie.Result;
21import com.ibm.icu.util.BytesTrieBuilder;
22import com.ibm.icu.util.CharsTrie;
23import com.ibm.icu.util.CharsTrieBuilder;
24import com.ibm.icu.util.StringTrieBuilder.Option;
25
26// would be nice to have a BytesTrieBuilder.add(aByte);
27// question: can bytetrie store <"",x>?
28// can you store the same string twice, eg add(bytes1, value), add(bytes1, value)? What happens? If an error,
29// should happen on add, not on build.
30// the BytesTrieBuilder.build should create a BytesTrie, not a raw array. For the latter, use buildArray or something.
31// need class description; examples of usage; which method can/should be called after which others.
32
33public abstract class TrieMap<V> implements Iterable<Entry<CharSequence, V>> {
34
35    public enum Style {
36        BYTES, CHARS
37    }
38
39    private static final boolean DEBUG = true;
40
41    protected final V[] intToValue;
42    protected final int size;
43
44    protected TrieMap(V[] intToValue, int size) {
45        this.intToValue = intToValue;
46        this.size = size;
47    }
48
49    public int keyByteSize() {
50        return size;
51    }
52
53    public static abstract class Builder<V> {
54        Option option;
55        protected List<V> intToValueTemp = new ArrayList<V>();
56        protected Map<V, Integer> valueToIntegerTemp = new HashMap<V, Integer>();
57
58        static public <K extends CharSequence, V> Builder<V> with(Style style, Map<K, V> keyValuePairs) {
59            return with(style, Option.SMALL, keyValuePairs);
60        }
61
62        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, Map<K, V> keyValuePairs) {
63            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
64                    : new CharsTrieMap.CharsBuilder<V>();
65            result.option = option;
66            return result.addAll(keyValuePairs);
67        }
68
69        static public <K extends CharSequence, V> Builder<V> with(Style style, K key, V value) {
70            return with(style, Option.SMALL, key, value);
71        }
72
73        static public <K extends CharSequence, V> Builder<V> with(Style style, Option option, K key, V value) {
74            Builder<V> result = style == Style.BYTES ? new BytesTrieMap.BytesBuilder<V>()
75                    : new CharsTrieMap.CharsBuilder<V>();
76            result.option = option;
77            return result.add(key, value);
78        }
79
80        public abstract Builder<V> add(CharSequence key, V value);
81
82        public abstract <K extends CharSequence> Builder<V> addAll(Map<K, V> keyValuePairs);
83
84        public abstract TrieMap<V> build();
85    }
86
87    abstract public V get(CharSequence test);
88
89    /**
90     * Warning: the entry contents are only valid until the next next() call!!
91     */
92    abstract public Iterator<Entry<CharSequence, V>> iterator();
93
94    abstract public Matcher<V> getMatcher();
95
96    public abstract static class Matcher<V> {
97        protected CharSequence text = "";
98        protected int start = 0;
99        protected int current = 0;
100
101        abstract void set(CharSequence string, int i);
102
103        abstract boolean next();
104
105        abstract V getValue();
106
107        abstract int getStart();
108
109        abstract int getEnd();
110
111        abstract boolean nextStart();
112    }
113
114    private static class BytesTrieMap<V> extends TrieMap<V> {
115        private final BytesTrie bytesTrie;
116        private byte[] bytes = new byte[3];
117
118        private BytesTrieMap(BytesTrie bytesTrie, V[] intToValue, int size) {
119            super(intToValue, size);
120            this.bytesTrie = bytesTrie;
121        }
122
123        public V get(CharSequence test) {
124            int length = test.length();
125            bytesTrie.reset();
126            if (length == 0) {
127                return bytesTrie.current().hasValue() ? intToValue[bytesTrie.getValue()] : null;
128            }
129            Result result = Result.NO_VALUE;
130            for (int i = 0; i < length; ++i) {
131                if (!result.hasNext()) {
132                    return null;
133                }
134                char c = test.charAt(i);
135                int limit = ByteConverter.getBytes(c, bytes, 0);
136                result = limit == 1 ? bytesTrie.next(bytes[0]) : bytesTrie.next(bytes, 0, limit);
137            }
138            return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
139
140            // int length = test.length();
141            // if (length == 0) {
142            // return null;
143            // }
144            // bytesTrie.reset();
145            // Result result = null;
146            // byte[] bytes = new byte[3];
147            // for (int i = 0; i < length; ++i) {
148            // char c = test.charAt(i);
149            // int limit = ByteConverter.getBytes(c, bytes, 0);
150            // for (int j = 0; j < limit; ++j) {
151            // result = bytesTrie.next(bytes[j]&0xFF);
152            // if (!result.matches()) {
153            // return null;
154            // }
155            // }
156            // }
157            // return result.hasValue() ? intToValue[bytesTrie.getValue()] : null;
158        }
159
160        public String toString() {
161            return toString(bytesTrie, " : ", "\n");
162        }
163
164        /**
165         * Warning: the entry contents are only valid until the next next() call!!
166         */
167        public Iterator<Entry<CharSequence, V>> iterator() {
168            // TODO Auto-generated method stub
169            return new BytesIterator();
170        }
171
172        public TrieMap.Matcher<V> getMatcher() {
173            return new BytesMatcher();
174        }
175
176        private class BytesIterator implements Iterator<Entry<CharSequence, V>> {
177            BytesTrie.Iterator iterator = bytesTrie.iterator();
178            BytesEntry entry = new BytesEntry();
179
180            public boolean hasNext() {
181                return iterator.hasNext();
182            }
183
184            public Entry<CharSequence, V> next() {
185                entry.bytesEntry = iterator.next();
186                return entry;
187            }
188
189            public void remove() {
190                throw new UnsupportedOperationException();
191            }
192        }
193
194        private class BytesEntry implements Entry<CharSequence, V> {
195            public BytesTrie.Entry bytesEntry;
196            StringBuilder buffer = new StringBuilder();
197
198            public CharSequence getKey() {
199                buffer.setLength(0);
200                ByteConverter.getChars(bytesEntry, buffer);
201                return buffer;
202            }
203
204            public V getValue() {
205                return intToValue[bytesEntry.value];
206            }
207
208            public V setValue(V value) {
209                throw new UnsupportedOperationException();
210            }
211        }
212
213        public class BytesMatcher extends Matcher<V> {
214            private byte[] bytes = new byte[3];
215            private V value = null;
216
217            public void set(CharSequence text, int start) {
218                this.text = text;
219                this.start = start;
220                this.current = start;
221            }
222
223            public int getStart() {
224                return start;
225            }
226
227            public int getEnd() {
228                return current;
229            }
230
231            /**
232             * Finds the next match. Returns false when there are no possible further matches from the current start
233             * point. Once that happens, call nextStart(); Call getValue to get the current value.
234             *
235             * @return false when done. There may be a value, however.
236             */
237            public boolean next() {
238                while (current < text.length()) {
239                    char c = text.charAt(current++);
240                    int limit = ByteConverter.getBytes(c, bytes, 0);
241                    for (int j = 0; j < limit; ++j) {
242                        Result result = bytesTrie.next(bytes[j]);
243                        if (result.hasValue()) {
244                            if (j < limit - 1) {
245                                throw new IllegalArgumentException("Data corrupt");
246                            }
247                            value = intToValue[bytesTrie.getValue()];
248                            return result.hasNext();
249                        } else if (!result.matches()) {
250                            value = null;
251                            return false;
252                        }
253                    }
254                }
255                value = null;
256                return false;
257            }
258
259            public boolean nextStart() {
260                if (start >= text.length()) {
261                    return false;
262                }
263                ++start;
264                current = start;
265                bytesTrie.reset();
266                return true;
267            }
268
269            public V getValue() {
270                return value;
271            }
272        }
273    }
274
275    public static class BytesBuilder<V> extends Builder<V> {
276        BytesTrieBuilder builder = new BytesTrieBuilder();
277        byte[] bytes = new byte[200];
278        List<String> debugBytes = DEBUG ? new ArrayList<String>() : null;
279
280        public BytesBuilder<V> add(CharSequence key, V value) {
281            // traverse the values, and get a mapping of a byte string to list of
282            // integers, and a mapping from those integers to a set of values
283            Integer index;
284            if (option == Option.SMALL) {
285                index = valueToIntegerTemp.get(value);
286                if (index == null) {
287                    index = intToValueTemp.size();
288                    intToValueTemp.add(value);
289                    valueToIntegerTemp.put(value, index);
290                }
291            } else {
292                index = intToValueTemp.size();
293                intToValueTemp.add(value);
294            }
295            // dumb implementation for now
296            // the buffer size is at most 3 * number_of_chars
297            if (bytes.length < key.length() * 3) {
298                bytes = new byte[64 + key.length() * 3];
299            }
300            int limit = 0;
301            for (int i = 0; i < key.length(); ++i) {
302                char c = key.charAt(i);
303                limit = ByteConverter.getBytes(c, bytes, limit);
304            }
305            try {
306                builder.add(bytes, limit, index);
307                return this;
308            } catch (Exception e) {
309                ArrayList<String> list = new ArrayList<String>();
310                for (int i = 0; i < limit; ++i) {
311                    list.add(Utility.hex(bytes[i]));
312                }
313                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + list, e);
314            }
315        }
316
317        public <K extends CharSequence> BytesBuilder<V> addAll(Map<K, V> keyValuePairs) {
318            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
319                add(entry.getKey(), entry.getValue());
320            }
321            return this;
322        }
323
324        public TrieMap<V> build() {
325            int size = builder.buildByteBuffer(option).remaining();
326            BytesTrie bytesTrie = builder.build(option);
327            @SuppressWarnings("unchecked")
328            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
329            return new BytesTrieMap<V>(bytesTrie, intToValueArray, size);
330        }
331    }
332
333    private static class CharsTrieMap<V> extends TrieMap<V> {
334        private final CharsTrie charsTrie;
335
336        private CharsTrieMap(CharsTrie charsTrie, V[] intToValue, int size) {
337            super(intToValue, size);
338            this.charsTrie = charsTrie;
339        }
340
341        public V get(CharSequence test) {
342            Result result = charsTrie.reset().next(test, 0, test.length());
343            return result.hasValue() ? intToValue[charsTrie.getValue()] : null;
344        }
345
346        public String toString() {
347            return toString(charsTrie, " : ", "\n");
348        }
349
350        /**
351         * Warning: the entry contents are only valid until the next next() call!!
352         */
353        public Iterator<Entry<CharSequence, V>> iterator() {
354            // TODO Auto-generated method stub
355            return new CharsIterator();
356        }
357
358        public TrieMap.Matcher<V> getMatcher() {
359            return new CharsMatcher();
360        }
361
362        private class CharsIterator implements Iterator<Entry<CharSequence, V>> {
363            CharsTrie.Iterator iterator = charsTrie.iterator();
364            CharsEntry entry = new CharsEntry();
365
366            public boolean hasNext() {
367                return iterator.hasNext();
368            }
369
370            public Entry<CharSequence, V> next() {
371                entry.charsEntry = iterator.next();
372                return entry;
373            }
374
375            public void remove() {
376                throw new UnsupportedOperationException();
377            }
378
379        }
380
381        private class CharsEntry implements Entry<CharSequence, V> {
382            public CharsTrie.Entry charsEntry;
383
384            public CharSequence getKey() {
385                return charsEntry.chars;
386            }
387
388            public V getValue() {
389                return intToValue[charsEntry.value];
390            }
391
392            public V setValue(V value) {
393                throw new UnsupportedOperationException();
394            }
395        }
396
397        public class CharsMatcher extends Matcher<V> {
398            private V value = null;
399
400            public void set(CharSequence text, int start) {
401                this.text = text;
402                this.start = start;
403                this.current = start;
404            }
405
406            public int getStart() {
407                return start;
408            }
409
410            public int getEnd() {
411                return current;
412            }
413
414            /**
415             * Finds the next match. Returns false when there are no possible further matches from the current start
416             * point. Once that happens, call nextStart(); Call getValue to get the current value.
417             *
418             * @return false when done. There may be a value, however.
419             */
420            public boolean next() {
421                while (current < text.length()) {
422                    char c = text.charAt(current++);
423                    Result result = charsTrie.next(c);
424                    if (result.hasValue()) {
425                        value = intToValue[charsTrie.getValue()];
426                        return result.hasNext();
427                    } else if (!result.matches()) {
428                        value = null;
429                        return false;
430
431                    }
432                }
433                value = null;
434                return false;
435            }
436
437            public boolean nextStart() {
438                if (start >= text.length()) {
439                    return false;
440                }
441                ++start;
442                current = start;
443                charsTrie.reset();
444                return true;
445            }
446
447            public V getValue() {
448                return value;
449            }
450        }
451    }
452
453    public static class CharsBuilder<V> extends Builder<V> {
454        CharsTrieBuilder builder = new CharsTrieBuilder();
455
456        public CharsBuilder<V> add(CharSequence key, V value) {
457            // traverse the values, and get a mapping of a byte string to list of
458            // integers, and a mapping from those integers to a set of values
459            Integer index;
460            if (option == Option.SMALL) {
461                index = valueToIntegerTemp.get(value);
462                if (index == null) {
463                    index = intToValueTemp.size();
464                    intToValueTemp.add(value);
465                    valueToIntegerTemp.put(value, index);
466                }
467            } else {
468                index = intToValueTemp.size();
469                intToValueTemp.add(value);
470            }
471            try {
472                builder.add(key.toString(), index);
473                return this;
474            } catch (Exception e) {
475                throw new IllegalArgumentException("Failed to add " + value + ", " + key + "=" + Utility.hex(key), e);
476            }
477        }
478
479        public <K extends CharSequence> CharsBuilder<V> addAll(Map<K, V> keyValuePairs) {
480            for (Entry<K, V> entry : keyValuePairs.entrySet()) {
481                add(entry.getKey(), entry.getValue());
482            }
483            return this;
484        }
485
486        public TrieMap<V> build() {
487            CharSequence buildCharSequence = builder.buildCharSequence(option);
488            int size = 2 * buildCharSequence.length();
489            // CharsTrie charsTrie = builder.build(option);
490            CharsTrie charsTrie = new CharsTrie(buildCharSequence, 0);
491            @SuppressWarnings("unchecked")
492            V[] intToValueArray = intToValueTemp.toArray((V[]) (new Object[intToValueTemp.size()]));
493            return new CharsTrieMap<V>(charsTrie, intToValueArray, size);
494        }
495    }
496
497    /**
498     * Supports the following format for encoding chars (Unicode 16-bit code units). The format is slightly simpler and
499     * more compact than UTF8, but also maintains ordering. It is not, however self-synchronizing, and is not intended
500     * for general usage
501     *
502     * <pre>
503     * 0000..007F - 0xxx xxxx
504     * 0000..7EFF - 1yyy yyyy xxxx xxxx
505     * 7F00..FFFF - 1111 1111 yyyy yyyy xxxx xxxx
506     * </pre>
507     */
508    static class ByteConverter {
509        public static int getBytes(char source, byte[] bytes, int limit) {
510            if (source < 0x80) {
511                bytes[limit++] = (byte) source;
512            } else if (source < 0x7F00) {
513                bytes[limit++] = (byte) (0x80 | (source >> 8));
514                bytes[limit++] = (byte) source;
515            } else {
516                bytes[limit++] = (byte) -1;
517                bytes[limit++] = (byte) (source >> 8);
518                bytes[limit++] = (byte) source;
519            }
520            return limit;
521        }
522
523        /**
524         * Transform the string into a sequence of bytes, appending them after start, and return the new limit.
525         */
526        public static int getBytes(CharSequence source, byte[] bytes, int limit) {
527            for (int i = 0; i < source.length(); ++i) {
528                limit = getBytes(source.charAt(i), bytes, limit);
529            }
530            return limit;
531        }
532
533        /**
534         * Transform a sequence of bytes into a string, according to the format in getBytes. No error checking.
535         */
536        public static String getChars(byte[] bytes, int start, int limit) {
537            StringBuilder buffer = new StringBuilder();
538            char[] output = new char[1];
539            for (int i = start; i < limit;) {
540                i = getChar(bytes, i, output);
541                buffer.append(output[0]);
542            }
543            return buffer.toString();
544        }
545
546        public static int getChar(byte[] bytes, int start, char[] output) {
547            byte b = bytes[start++];
548            if (b >= 0) {
549                output[0] = (char) b;
550            } else if (b != (byte) -1) { // 2 bytes
551                int b1 = 0x7F & b;
552                int b2 = 0xFF & bytes[start++];
553                output[0] = (char) ((b1 << 8) | b2);
554            } else {
555                int b2 = bytes[start++];
556                int b3 = 0xFF & bytes[start++];
557                output[0] = (char) ((b2 << 8) | b3);
558            }
559            return start;
560        }
561
562        private static void getChars(BytesTrie.Entry entry, StringBuilder stringBuilder) {
563            int len = entry.bytesLength();
564            for (int i = 0; i < len;) {
565                byte b = entry.byteAt(i++);
566                if (b >= 0) {
567                    stringBuilder.append((char) b);
568                } else if (b != (byte) -1) { // 2 bytes
569                    int b1 = 0x7F & b;
570                    int b2 = 0xFF & entry.byteAt(i++);
571                    stringBuilder.append((char) ((b1 << 8) | b2));
572                } else {
573                    int b2 = entry.byteAt(i++);
574                    int b3 = 0xFF & entry.byteAt(i++);
575                    stringBuilder.append((char) ((b2 << 8) | b3));
576                }
577            }
578        }
579    }
580
581    public static String toString(BytesTrie bytesTrie2) {
582        return toString(bytesTrie2, " : ", "\n");
583    }
584
585    public static String toString(BytesTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
586        StringBuilder buffer = new StringBuilder();
587        BytesTrie.Iterator iterator = bytesTrie2.iterator();
588        while (iterator.hasNext()) {
589            BytesTrie.Entry bytesEntry = iterator.next();
590            int len = bytesEntry.bytesLength();
591            byte[] bytes = new byte[len];
592            bytesEntry.copyBytesTo(bytes, 0);
593            buffer.append(Utility.hex(bytes, 0, len, " "))
594                    .append(keyValueSeparator)
595                    .append(bytesEntry.value)
596                    .append(itemSeparator);
597        }
598        return buffer.toString();
599    }
600
601    public static String toString(CharsTrie bytesTrie2, String keyValueSeparator, String itemSeparator) {
602        StringBuilder buffer = new StringBuilder();
603        CharsTrie.Iterator iterator = bytesTrie2.iterator();
604        while (iterator.hasNext()) {
605            CharsTrie.Entry bytesEntry = iterator.next();
606            buffer.append(Utility.hex(bytesEntry.chars))
607                    .append(keyValueSeparator)
608                    .append(bytesEntry.value)
609                    .append(itemSeparator);
610        }
611        return buffer.toString();
612    }
613
614}
615