1// © 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html#License 3/* 4 * ******************************************************************************** 5 * Copyright (C) 2007-2011, International Business Machines Corporation and others. 6 * All Rights Reserved. 7 * ******************************************************************************** 8 */ 9package com.ibm.icu.impl; 10 11import java.util.Iterator; 12import java.util.LinkedList; 13import java.util.List; 14import java.util.ListIterator; 15 16import com.ibm.icu.lang.UCharacter; 17 18/** 19 * TextTrieMap is a trie implementation for supporting 20 * fast prefix match for the key. 21 */ 22public class TextTrieMap<V> { 23 24 private Node _root = new Node(); 25 boolean _ignoreCase; 26 27 /** 28 * Constructs a TextTrieMap object. 29 * 30 * @param ignoreCase true to use simple case insensitive match 31 */ 32 public TextTrieMap(boolean ignoreCase) { 33 _ignoreCase = ignoreCase; 34 } 35 36 /** 37 * Adds the text key and its associated object in this object. 38 * 39 * @param text The text. 40 * @param val The value object associated with the text. 41 */ 42 public TextTrieMap<V> put(CharSequence text, V val) { 43 CharIterator chitr = new CharIterator(text, 0, _ignoreCase); 44 _root.add(chitr, val); 45 return this; 46 } 47 48 /** 49 * Gets an iterator of the objects associated with the 50 * longest prefix matching string key. 51 * 52 * @param text The text to be matched with prefixes. 53 * @return An iterator of the objects associated with 54 * the longest prefix matching matching key, or null 55 * if no matching entry is found. 56 */ 57 public Iterator<V> get(String text) { 58 return get(text, 0); 59 } 60 61 /** 62 * Gets an iterator of the objects associated with the 63 * longest prefix matching string key starting at the 64 * specified position. 65 * 66 * @param text The text to be matched with prefixes. 67 * @param start The start index of of the text 68 * @return An iterator of the objects associated with the 69 * longest prefix matching matching key, or null if no 70 * matching entry is found. 71 */ 72 public Iterator<V> get(CharSequence text, int start) { 73 return get(text, start, null); 74 } 75 76 public Iterator<V> get(CharSequence text, int start, int[] matchLen) { 77 LongestMatchHandler<V> handler = new LongestMatchHandler<V>(); 78 find(text, start, handler); 79 if (matchLen != null && matchLen.length > 0) { 80 matchLen[0] = handler.getMatchLength(); 81 } 82 return handler.getMatches(); 83 } 84 85 public void find(CharSequence text, ResultHandler<V> handler) { 86 find(text, 0, handler); 87 } 88 89 public void find(CharSequence text, int offset, ResultHandler<V> handler) { 90 CharIterator chitr = new CharIterator(text, offset, _ignoreCase); 91 find(_root, chitr, handler); 92 } 93 94 private synchronized void find(Node node, CharIterator chitr, ResultHandler<V> handler) { 95 Iterator<V> values = node.values(); 96 if (values != null) { 97 if (!handler.handlePrefixMatch(chitr.processedLength(), values)) { 98 return; 99 } 100 } 101 102 Node nextMatch = node.findMatch(chitr); 103 if (nextMatch != null) { 104 find(nextMatch, chitr, handler); 105 } 106 } 107 108 public static class CharIterator implements Iterator<Character> { 109 private boolean _ignoreCase; 110 private CharSequence _text; 111 private int _nextIdx; 112 private int _startIdx; 113 114 private Character _remainingChar; 115 116 CharIterator(CharSequence text, int offset, boolean ignoreCase) { 117 _text = text; 118 _nextIdx = _startIdx = offset; 119 _ignoreCase = ignoreCase; 120 } 121 122 /* (non-Javadoc) 123 * @see java.util.Iterator#hasNext() 124 */ 125 @Override 126 public boolean hasNext() { 127 if (_nextIdx == _text.length() && _remainingChar == null) { 128 return false; 129 } 130 return true; 131 } 132 133 /* (non-Javadoc) 134 * @see java.util.Iterator#next() 135 */ 136 @Override 137 public Character next() { 138 if (_nextIdx == _text.length() && _remainingChar == null) { 139 return null; 140 } 141 Character next; 142 if (_remainingChar != null) { 143 next = _remainingChar; 144 _remainingChar = null; 145 } else { 146 if (_ignoreCase) { 147 int cp = UCharacter.foldCase(Character.codePointAt(_text, _nextIdx), true); 148 _nextIdx = _nextIdx + Character.charCount(cp); 149 150 char[] chars = Character.toChars(cp); 151 next = chars[0]; 152 if (chars.length == 2) { 153 _remainingChar = chars[1]; 154 } 155 } else { 156 next = _text.charAt(_nextIdx); 157 _nextIdx++; 158 } 159 } 160 return next; 161 } 162 163 /* (non-Javadoc) 164 * @see java.util.Iterator#remove() 165 */ 166 @Override 167 public void remove() { 168 throw new UnsupportedOperationException("remove() not supproted"); 169 } 170 171 public int nextIndex() { 172 return _nextIdx; 173 } 174 175 public int processedLength() { 176 if (_remainingChar != null) { 177 throw new IllegalStateException("In the middle of surrogate pair"); 178 } 179 return _nextIdx - _startIdx; 180 } 181 } 182 183 /** 184 * Callback handler for processing prefix matches used by 185 * find method. 186 */ 187 public interface ResultHandler<V> { 188 /** 189 * Handles a prefix key match 190 * 191 * @param matchLength Matched key's length 192 * @param values An iterator of the objects associated with the matched key 193 * @return Return true to continue the search in the trie, false to quit. 194 */ 195 public boolean handlePrefixMatch(int matchLength, Iterator<V> values); 196 } 197 198 private static class LongestMatchHandler<V> implements ResultHandler<V> { 199 private Iterator<V> matches = null; 200 private int length = 0; 201 202 @Override 203 public boolean handlePrefixMatch(int matchLength, Iterator<V> values) { 204 if (matchLength > length) { 205 length = matchLength; 206 matches = values; 207 } 208 return true; 209 } 210 211 public Iterator<V> getMatches() { 212 return matches; 213 } 214 215 public int getMatchLength() { 216 return length; 217 } 218 } 219 220 /** 221 * Inner class representing a text node in the trie. 222 */ 223 private class Node { 224 private char[] _text; 225 private List<V> _values; 226 private List<Node> _children; 227 228 private Node() { 229 } 230 231 private Node(char[] text, List<V> values, List<Node> children) { 232 _text = text; 233 _values = values; 234 _children = children; 235 } 236 237 public Iterator<V> values() { 238 if (_values == null) { 239 return null; 240 } 241 return _values.iterator(); 242 } 243 244 public void add(CharIterator chitr, V value) { 245 StringBuilder buf = new StringBuilder(); 246 while (chitr.hasNext()) { 247 buf.append(chitr.next()); 248 } 249 add(toCharArray(buf), 0, value); 250 } 251 252 public Node findMatch(CharIterator chitr) { 253 if (_children == null) { 254 return null; 255 } 256 if (!chitr.hasNext()) { 257 return null; 258 } 259 Node match = null; 260 Character ch = chitr.next(); 261 for (Node child : _children) { 262 if (ch < child._text[0]) { 263 break; 264 } 265 if (ch == child._text[0]) { 266 if (child.matchFollowing(chitr)) { 267 match = child; 268 } 269 break; 270 } 271 } 272 return match; 273 } 274 275 private void add(char[] text, int offset, V value) { 276 if (text.length == offset) { 277 _values = addValue(_values, value); 278 return; 279 } 280 281 if (_children == null) { 282 _children = new LinkedList<Node>(); 283 Node child = new Node(subArray(text, offset), addValue(null, value), null); 284 _children.add(child); 285 return; 286 } 287 288 // walk through children 289 ListIterator<Node> litr = _children.listIterator(); 290 while (litr.hasNext()) { 291 Node next = litr.next(); 292 if (text[offset] < next._text[0]) { 293 litr.previous(); 294 break; 295 } 296 if (text[offset] == next._text[0]) { 297 int matchLen = next.lenMatches(text, offset); 298 if (matchLen == next._text.length) { 299 // full match 300 next.add(text, offset + matchLen, value); 301 } else { 302 // partial match, create a branch 303 next.split(matchLen); 304 next.add(text, offset + matchLen, value); 305 } 306 return; 307 } 308 } 309 // add a new child to this node 310 litr.add(new Node(subArray(text, offset), addValue(null, value), null)); 311 } 312 313 private boolean matchFollowing(CharIterator chitr) { 314 boolean matched = true; 315 int idx = 1; 316 while (idx < _text.length) { 317 if(!chitr.hasNext()) { 318 matched = false; 319 break; 320 } 321 Character ch = chitr.next(); 322 if (ch != _text[idx]) { 323 matched = false; 324 break; 325 } 326 idx++; 327 } 328 return matched; 329 } 330 331 private int lenMatches(char[] text, int offset) { 332 int textLen = text.length - offset; 333 int limit = _text.length < textLen ? _text.length : textLen; 334 int len = 0; 335 while (len < limit) { 336 if (_text[len] != text[offset + len]) { 337 break; 338 } 339 len++; 340 } 341 return len; 342 } 343 344 private void split(int offset) { 345 // split the current node at the offset 346 char[] childText = subArray(_text, offset); 347 _text = subArray(_text, 0, offset); 348 349 // add the Node representing after the offset as a child 350 Node child = new Node(childText, _values, _children); 351 _values = null; 352 353 _children = new LinkedList<Node>(); 354 _children.add(child); 355 } 356 357 private List<V> addValue(List<V> list, V value) { 358 if (list == null) { 359 list = new LinkedList<V>(); 360 } 361 list.add(value); 362 return list; 363 } 364 } 365 366 private static char[] toCharArray(CharSequence text) { 367 char[] array = new char[text.length()]; 368 for (int i = 0; i < array.length; i++) { 369 array[i] = text.charAt(i); 370 } 371 return array; 372 } 373 374 private static char[] subArray(char[] array, int start) { 375 if (start == 0) { 376 return array; 377 } 378 char[] sub = new char[array.length - start]; 379 System.arraycopy(array, start, sub, 0, sub.length); 380 return sub; 381 } 382 383 private static char[] subArray(char[] array, int start, int limit) { 384 if (start == 0 && limit == array.length) { 385 return array; 386 } 387 char[] sub = new char[limit - start]; 388 System.arraycopy(array, start, sub, 0, limit - start); 389 return sub; 390 } 391} 392