1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4 * ********************************************************************************
5 * Copyright (C) 2007-2011, International Business Machines Corporation and others.
6 * All Rights Reserved.
7 * ********************************************************************************
8 */
9package com.ibm.icu.impl;
10
11import java.util.Iterator;
12import java.util.LinkedList;
13import java.util.List;
14import java.util.ListIterator;
15
16import com.ibm.icu.lang.UCharacter;
17
18/**
19 * TextTrieMap is a trie implementation for supporting
20 * fast prefix match for the key.
21 */
22public class TextTrieMap<V> {
23
24    private Node _root = new Node();
25    boolean _ignoreCase;
26
27    /**
28     * Constructs a TextTrieMap object.
29     *
30     * @param ignoreCase true to use simple case insensitive match
31     */
32    public TextTrieMap(boolean ignoreCase) {
33        _ignoreCase = ignoreCase;
34    }
35
36    /**
37     * Adds the text key and its associated object in this object.
38     *
39     * @param text The text.
40     * @param val The value object associated with the text.
41     */
42    public TextTrieMap<V> put(CharSequence text, V val) {
43        CharIterator chitr = new CharIterator(text, 0, _ignoreCase);
44        _root.add(chitr, val);
45        return this;
46    }
47
48    /**
49     * Gets an iterator of the objects associated with the
50     * longest prefix matching string key.
51     *
52     * @param text The text to be matched with prefixes.
53     * @return An iterator of the objects associated with
54     * the longest prefix matching matching key, or null
55     * if no matching entry is found.
56     */
57    public Iterator<V> get(String text) {
58        return get(text, 0);
59    }
60
61    /**
62     * Gets an iterator of the objects associated with the
63     * longest prefix matching string key starting at the
64     * specified position.
65     *
66     * @param text The text to be matched with prefixes.
67     * @param start The start index of of the text
68     * @return An iterator of the objects associated with the
69     * longest prefix matching matching key, or null if no
70     * matching entry is found.
71     */
72    public Iterator<V> get(CharSequence text, int start) {
73        return get(text, start, null);
74    }
75
76    public Iterator<V> get(CharSequence text, int start, int[] matchLen) {
77        LongestMatchHandler<V> handler = new LongestMatchHandler<V>();
78        find(text, start, handler);
79        if (matchLen != null && matchLen.length > 0) {
80            matchLen[0] = handler.getMatchLength();
81        }
82        return handler.getMatches();
83    }
84
85    public void find(CharSequence text, ResultHandler<V> handler) {
86        find(text, 0, handler);
87    }
88
89    public void find(CharSequence text, int offset, ResultHandler<V> handler) {
90        CharIterator chitr = new CharIterator(text, offset, _ignoreCase);
91        find(_root, chitr, handler);
92    }
93
94    private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) {
95        Iterator<V> values = node.values();
96        if (values != null) {
97            if (!handler.handlePrefixMatch(chitr.processedLength(), values)) {
98                return;
99            }
100        }
101
102        Node nextMatch = node.findMatch(chitr);
103        if (nextMatch != null) {
104            find(nextMatch, chitr, handler);
105        }
106    }
107
108    public static class CharIterator implements Iterator<Character> {
109        private boolean _ignoreCase;
110        private CharSequence _text;
111        private int _nextIdx;
112        private int _startIdx;
113
114        private Character _remainingChar;
115
116        CharIterator(CharSequence text, int offset, boolean ignoreCase) {
117            _text = text;
118            _nextIdx = _startIdx = offset;
119            _ignoreCase = ignoreCase;
120        }
121
122        /* (non-Javadoc)
123         * @see java.util.Iterator#hasNext()
124         */
125        @Override
126        public boolean hasNext() {
127            if (_nextIdx == _text.length() && _remainingChar == null) {
128                return false;
129            }
130            return true;
131        }
132
133        /* (non-Javadoc)
134         * @see java.util.Iterator#next()
135         */
136        @Override
137        public Character next() {
138            if (_nextIdx == _text.length() && _remainingChar == null) {
139                return null;
140            }
141            Character next;
142            if (_remainingChar != null) {
143                next = _remainingChar;
144                _remainingChar = null;
145            } else {
146                if (_ignoreCase) {
147                    int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true);
148                    _nextIdx = _nextIdx + Character.charCount(cp);
149
150                    char[] chars = Character.toChars(cp);
151                    next = chars[0];
152                    if (chars.length == 2) {
153                        _remainingChar = chars[1];
154                    }
155                } else {
156                    next = _text.charAt(_nextIdx);
157                    _nextIdx++;
158                }
159            }
160            return next;
161        }
162
163        /* (non-Javadoc)
164         * @see java.util.Iterator#remove()
165         */
166        @Override
167        public void remove() {
168            throw new UnsupportedOperationException("remove() not supproted");
169        }
170
171        public int nextIndex() {
172            return _nextIdx;
173        }
174
175        public int processedLength() {
176            if (_remainingChar != null) {
177                throw new IllegalStateException("In the middle of surrogate pair");
178            }
179            return _nextIdx - _startIdx;
180        }
181    }
182
183    /**
184     * Callback handler for processing prefix matches used by
185     * find method.
186     */
187    public interface ResultHandler<V> {
188        /**
189         * Handles a prefix key match
190         *
191         * @param matchLength Matched key's length
192         * @param values An iterator of the objects associated with the matched key
193         * @return Return true to continue the search in the trie, false to quit.
194         */
195        public boolean handlePrefixMatch(int matchLength, Iterator<V> values);
196    }
197
198    private static class LongestMatchHandler<V> implements ResultHandler<V> {
199        private Iterator<V> matches = null;
200        private int length = 0;
201
202        @Override
203        public boolean handlePrefixMatch(int matchLength, Iterator<V> values) {
204            if (matchLength > length) {
205                length = matchLength;
206                matches = values;
207            }
208            return true;
209        }
210
211        public Iterator<V> getMatches() {
212            return matches;
213        }
214
215        public int getMatchLength() {
216            return length;
217        }
218    }
219
220    /**
221     * Inner class representing a text node in the trie.
222     */
223    private class Node {
224        private char[] _text;
225        private List<V> _values;
226        private List<Node> _children;
227
228        private Node() {
229        }
230
231        private Node(char[] text, List<V> values, List<Node> children) {
232            _text = text;
233            _values = values;
234            _children = children;
235        }
236
237        public Iterator<V> values() {
238            if (_values == null) {
239                return null;
240            }
241            return _values.iterator();
242        }
243
244        public void add(CharIterator chitr, V value) {
245            StringBuilder buf = new StringBuilder();
246            while (chitr.hasNext()) {
247                buf.append(chitr.next());
248            }
249            add(toCharArray(buf), 0, value);
250        }
251
252        public Node findMatch(CharIterator chitr) {
253            if (_children == null) {
254                return null;
255            }
256            if (!chitr.hasNext()) {
257                return null;
258            }
259            Node match = null;
260            Character ch = chitr.next();
261            for (Node child : _children) {
262                if (ch < child._text[0]) {
263                    break;
264                }
265                if (ch == child._text[0]) {
266                    if (child.matchFollowing(chitr)) {
267                        match = child;
268                    }
269                    break;
270                }
271            }
272            return match;
273        }
274
275        private void add(char[] text, int offset, V value) {
276            if (text.length == offset) {
277                _values = addValue(_values, value);
278                return;
279            }
280
281            if (_children == null) {
282                _children = new LinkedList<Node>();
283                Node child = new Node(subArray(text, offset), addValue(null, value), null);
284                _children.add(child);
285                return;
286            }
287
288            // walk through children
289            ListIterator<Node> litr = _children.listIterator();
290            while (litr.hasNext()) {
291                Node next = litr.next();
292                if (text[offset] < next._text[0]) {
293                    litr.previous();
294                    break;
295                }
296                if (text[offset] == next._text[0]) {
297                    int matchLen = next.lenMatches(text, offset);
298                    if (matchLen == next._text.length) {
299                        // full match
300                        next.add(text, offset + matchLen, value);
301                    } else {
302                        // partial match, create a branch
303                        next.split(matchLen);
304                        next.add(text, offset + matchLen, value);
305                    }
306                    return;
307                }
308            }
309            // add a new child to this node
310            litr.add(new Node(subArray(text, offset), addValue(null, value), null));
311        }
312
313        private boolean matchFollowing(CharIterator chitr) {
314            boolean matched = true;
315            int idx = 1;
316            while (idx < _text.length) {
317                if(!chitr.hasNext()) {
318                    matched = false;
319                    break;
320                }
321                Character ch = chitr.next();
322                if (ch != _text[idx]) {
323                    matched = false;
324                    break;
325                }
326                idx++;
327            }
328            return matched;
329        }
330
331        private int lenMatches(char[] text, int offset) {
332            int textLen = text.length - offset;
333            int limit = _text.length < textLen ? _text.length : textLen;
334            int len = 0;
335            while (len < limit) {
336                if (_text[len] != text[offset + len]) {
337                    break;
338                }
339                len++;
340            }
341            return len;
342        }
343
344        private void split(int offset) {
345            // split the current node at the offset
346            char[] childText = subArray(_text, offset);
347            _text = subArray(_text, 0, offset);
348
349            // add the Node representing after the offset as a child
350            Node child = new Node(childText, _values, _children);
351            _values = null;
352
353            _children = new LinkedList<Node>();
354            _children.add(child);
355        }
356
357        private List<V> addValue(List<V> list, V value) {
358            if (list == null) {
359                list = new LinkedList<V>();
360            }
361            list.add(value);
362            return list;
363        }
364    }
365
366    private static char[] toCharArray(CharSequence text) {
367        char[] array = new char[text.length()];
368        for (int i = 0; i < array.length; i++) {
369            array[i] = text.charAt(i);
370        }
371        return array;
372    }
373
374    private static char[] subArray(char[] array, int start) {
375        if (start == 0) {
376            return array;
377        }
378        char[] sub = new char[array.length - start];
379        System.arraycopy(array, start, sub, 0, sub.length);
380        return sub;
381    }
382
383    private static char[] subArray(char[] array, int start, int limit) {
384        if (start == 0 && limit == array.length) {
385            return array;
386        }
387        char[] sub = new char[limit - start];
388        System.arraycopy(array, start, sub, 0, limit - start);
389        return sub;
390    }
391}
392