17935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/* 27935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 37935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Copyright (C) 2010-2014, International Business Machines 47935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* Corporation and others. All Rights Reserved. 57935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert******************************************************************************* 67935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created on: 2010nov23 77935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* created by: Markus W. Scherer 87935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert* ported from ICU4C bytestrie.h/.cpp 97935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert*/ 107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpackage com.ibm.icu.util; 117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.io.IOException; 137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.nio.ByteBuffer; 147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.ArrayList; 157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertimport java.util.NoSuchElementException; 167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert/** 187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Light-weight, non-const reader class for a BytesTrie. 197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Traverses a byte-serialized data structure with minimal state, 207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for mapping byte sequences to non-negative integer values. 217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>This class is not intended for public subclassing. 237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @author Markus W. Scherer 267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubertpublic final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> { 287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs a BytesTrie reader instance. 307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder, 327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * with the offset indicating the first byte of that sequence. 337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The BytesTrie object will not read more bytes than 347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the BytesTrieBuilder generated in the corresponding build() call. 357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <p>The array is not copied/cloned and must not be modified while 377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * the BytesTrie object is in use. 387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trieBytes Bytes array that contains the serialized trie. 407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param offset Root offset of the trie in the array. 417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesTrie(byte[] trieBytes, int offset) { 447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes_=trieBytes; 457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=root_=offset; 467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=-1; 477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Clones this trie reader object and its state, 517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * but not the byte array which will be shared. 527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A shallow clone of this trie. 537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert @Override 567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Object clone() throws CloneNotSupportedException { 577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return super.clone(); // A shallow copy is just what we need. 587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Resets this trie to its initial state. 627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return this 637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesTrie reset() { 667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=root_; 677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=-1; 687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * BytesTrie state object, for saving a trie's current state 737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and resetting the trie back to this state later. 747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final class State { 777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Constructs an empty State. 797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public State() {} 827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes; 837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int root; 847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int pos; 857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int remainingMatchLength; 867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Saves the state of this trie. 907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param state The State object to hold the trie's state. 917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return this 927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #resetToState 937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesTrie saveState(State state) /*const*/ { 967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert state.bytes=bytes_; 977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert state.root=root_; 987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert state.pos=pos_; 997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert state.remainingMatchLength=remainingMatchLength_; 1007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 1017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Resets this trie to the saved state. 1057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param state The State object which holds a saved trie state. 1067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return this 1077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws IllegalArgumentException if the state object contains no state, 1087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * or the state of a different trie 1097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #saveState 1107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #reset 1117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public BytesTrie resetToState(State state) { 1147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(bytes_==state.bytes && bytes_!=null && root_==state.root) { 1157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=state.pos; 1167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=state.remainingMatchLength; 1177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new IllegalArgumentException("incompatible trie state"); 1197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 1217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return values for BytesTrie.next(), CharsTrie.next() and similar methods. 1257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public enum Result { 1287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The input unit(s) did not continue a matching string. 1307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Once current()/next() return NO_MATCH, 1317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * all further calls to current()/next() will also return NO_MATCH, 1327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * until the trie is reset to its original state or to a saved state. 1337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NO_MATCH, 1367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The input unit(s) continued a matching string 1387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * but there is no value for the string so far. 1397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (It is a prefix of a longer string.) 1407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert NO_VALUE, 1437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The input unit(s) continued a matching string 1457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and there is a value for the string so far. 1467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This value will be returned by getValue(). 1477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * No further input byte/unit can continue a matching string. 1487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert FINAL_VALUE, 1517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The input unit(s) continued a matching string 1537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and there is a value for the string so far. 1547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * This value will be returned by getValue(). 1557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Another input byte/unit can continue a matching string. 1567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert INTERMEDIATE_VALUE; 1597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Note: The following methods assume the particular order 1617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // of enum constants, treating the ordinal() values like bit sets. 1627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Do not reorder the enum constants! 1637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Same as (result!=NO_MATCH). 1667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if the input bytes/units so far are part of a matching string/byte sequence. 1677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean matches() { return this!=NO_MATCH; } 1707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE). 1737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if there is a value for the input bytes/units so far. 1747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @see #getValue 1757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasValue() { return ordinal()>=2; } 1787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE). 1817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if another input byte/unit can continue a matching string. 1827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasNext() { return (ordinal()&1)!=0; } 1857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 1867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 1877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 1887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Determines whether the byte sequence so far matches, whether it has a value, 1897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * and whether another input byte can continue a matching byte sequence. 1907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The match/value Result. 1917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 1927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 1937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Result current() /*const*/ { 1947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 1957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 1967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 1977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 1987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node; 1997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 2007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 2017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Traverses the trie from the initial state for this input byte. 2067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Equivalent to reset().next(inByte). 2077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. 2087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Values below -0x100 and above 0xff will never match. 2097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The match/value Result. 2107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 2117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Result first(int inByte) { 2137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=-1; 2147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte<0) { 2157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert inByte+=0x100; 2167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return nextImpl(root_, inByte); 2187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Traverses the trie from the current state for this input byte. 2227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff. 2237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Values below -0x100 and above 0xff will never match. 2247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The match/value Result. 2257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 2267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Result next(int inByte) { 2287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 2297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 2307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 2317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte<0) { 2337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert inByte+=0x100; 2347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=remainingMatchLength_; // Actual remaining match length minus 1. 2367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length>=0) { 2377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Remaining part of a linear-match node. 2387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte==(bytes_[pos++]&0xff)) { 2397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=--length; 2407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=pos; 2417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node; 2427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 2437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 2447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 2457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 2467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 2477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return nextImpl(pos, inByte); 2507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 2527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 2537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Traverses the trie from the current state for this byte sequence. 2547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Equivalent to 2557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * <pre> 2567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Result result=current(); 2577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * for(each c in s) 2587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if(!result.hasNext()) return Result.NO_MATCH; 2597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * result=next(c); 2607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * return result; 2617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * </pre> 2627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param s Contains a string or byte sequence. 2637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sIndex The start index of the byte sequence in s. 2647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param sLimit The (exclusive) end index of the byte sequence in s. 2657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The match/value Result. 2667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 2677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 2687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Result next(byte[] s, int sIndex, int sLimit) { 2697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(sIndex>=sLimit) { 2707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Empty input. 2717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return current(); 2727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 2747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 2757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 2767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=remainingMatchLength_; // Actual remaining match length minus 1. 2787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 2797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fetch the next input byte, if there is one. 2807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Continue a linear-match node. 2817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte inByte; 2827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 2837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(sIndex==sLimit) { 2847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=length; 2857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=pos; 2867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node; 2877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ? 2887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 2897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert inByte=s[sIndex++]; 2917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length<0) { 2927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=length; 2937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 2947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte!=bytes_[pos]) { 2967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 2977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 2987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 2997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 3007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --length; 3017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 3037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos++]&0xff; 3047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinLinearMatch) { 3057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result=branchNext(pos, node, inByte&0xff); 3067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(result==Result.NO_MATCH) { 3077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 3087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fetch the next input byte, if there is one. 3107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(sIndex==sLimit) { 3117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 3127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(result==Result.FINAL_VALUE) { 3147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No further matching bytes. 3157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 3167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 3177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert inByte=s[sIndex++]; 3197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=pos_; // branchNext() advanced pos and wrote it to pos_ . 3207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node<kMinValueLead) { 3217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Match length+1 bytes. 3227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=node-kMinLinearMatch; // Actual match length minus 1. 3237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte!=bytes_[pos]) { 3247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 3257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 3267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 3287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --length; 3297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 3307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if((node&kValueIsFinal)!=0) { 3317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No further matching bytes. 3327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 3337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 3347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 3357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Skip intermediate value. 3367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 3377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The next node must not also be a value node. 3387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert((bytes_[pos]&0xff)<kMinValueLead); 3397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a matching byte sequence's value if called immediately after 3467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE. 3477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * getValue() can be called multiple times. 3487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 3497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE! 3507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The value for the byte sequence so far. 3517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 3527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getValue() /*const*/ { 3547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 3557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leadByte=bytes_[pos++]&0xff; 3567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert(leadByte>=kMinValueLead); 3577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return readValue(bytes_, pos, leadByte>>1); 3587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Determines whether all byte sequences reachable from the current state 3627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * map to the same value, and if so, returns that value. 3637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The unique value in bits 32..1 with bit 0 set, 3647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * if all byte sequences reachable from the current state 3657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * map to the same value; otherwise returns 0. 3667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 3677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public long getUniqueValue() /*const*/ { 3697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 3707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 3717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 3727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Skip the rest of a pending linear-match node. 3747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0); 3757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32. 3767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (uniqueValue<<31)>>31; 3777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 3797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 3807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds each byte which continues the byte sequence from the current state. 3817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now. 3827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param out Each next byte is 0-extended to a char and appended to this object. 3837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * (Only uses the out.append(c) method.) 3847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The number of bytes which continue the byte sequence from here. 3857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 3867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 3877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int getNextBytes(Appendable out) /*const*/ { 3887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 3897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 3907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 3917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(remainingMatchLength_>=0) { 3937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert append(out, bytes_[pos]&0xff); // Next byte of a pending linear-match node. 3947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 1; 3957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 3967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos++]&0xff; 3977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node>=kMinValueLead) { 3987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if((node&kValueIsFinal)!=0) { 3997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 4007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 4027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node=bytes_[pos++]&0xff; 4037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert(node<kMinValueLead); 4047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinLinearMatch) { 4077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node==0) { 4087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node=bytes_[pos++]&0xff; 4097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert getNextBranchBytes(bytes_, pos, ++node, out); 4117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return node; 4127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 4137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // First byte of the linear-match node. 4147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert append(out, bytes_[pos]&0xff); 4157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 1; 4167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterates from the current state of this trie. 4217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A new BytesTrie.Iterator. 4227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Iterator iterator() { 4257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new Iterator(bytes_, pos_, remainingMatchLength_, 0); 4267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterates from the current state of this trie. 4307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param maxStringLength If 0, the iterator returns full strings/byte sequences. 4317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Otherwise, the iterator returns strings with this maximum length. 4327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A new BytesTrie.Iterator. 4337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Iterator iterator(int maxStringLength) { 4367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength); 4377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterates from the root of a byte-serialized BytesTrie. 4417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param trieBytes Bytes array that contains the serialized trie. 4427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param offset Root offset of the trie in the array. 4437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param maxStringLength If 0, the iterator returns full strings/byte sequences. 4447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Otherwise, the iterator returns strings with this maximum length. 4457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return A new BytesTrie.Iterator. 4467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) { 4497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return new Iterator(trieBytes, offset, -1, maxStringLength); 4507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Return value type for the Iterator. 4547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final class Entry { 4577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Entry(int capacity) { 4587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes=new byte[capacity]; 4597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The length of the byte sequence. 4637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int bytesLength() { return length; } 4667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Returns a byte of the byte sequence. 4687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param index An index into the byte sequence. 4697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The index-th byte sequence byte. 4707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public byte byteAt(int index) { return bytes[index]; } 4737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Copies the byte sequence into a byte array. 4757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param dest Destination byte array. 4767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @param destOffset Starting offset to where in dest the byte sequence is copied. 4777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void copyBytesTo(byte[] dest, int destOffset) { 4807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.arraycopy(bytes, 0, dest, destOffset, length); 4817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return The byte sequence as a read-only ByteBuffer. 4847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public ByteBuffer bytesAsByteBuffer() { 4877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer(); 4887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 4897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 4917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * The value associated with the byte sequence. 4927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 4937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 4947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public int value; 4957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 4967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void ensureCapacity(int len) { 4977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(bytes.length<len) { 4987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)]; 4997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.arraycopy(bytes, 0, newBytes, 0, length); 5007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes=newBytes; 5017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void append(byte b) { 5047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ensureCapacity(length+1); 5057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes[length++]=b; 5067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void append(byte[] b, int off, int len) { 5087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ensureCapacity(length+len); 5097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert System.arraycopy(b, off, bytes, length, len); 5107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length+=len; 5117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void truncateString(int newLength) { length=newLength; } 5137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes; 5157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int length; 5167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterator for all of the (byte sequence, value) pairs in a BytesTrie. 5207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 5217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public static final class Iterator implements java.util.Iterator<Entry> { 5237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) { 5247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert bytes_=trieBytes; 5257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=initialPos_=offset; 5267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength; 5277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert maxLength_=maxStringLength; 5287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_=new Entry(maxLength_!=0 ? maxLength_ : 32); 5297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=remainingMatchLength_; // Actual remaining match length minus 1. 5307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length>=0) { 5317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Pending linear-match node, append remaining bytes to entry_. 5327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++length; 5337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(maxLength_>0 && length>maxLength_) { 5347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 5357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.append(bytes_, pos_, length); 5377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_+=length; 5387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_-=length; 5397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Resets this iterator to its initial state. 5447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return this 5457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 5467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Iterator reset() { 5487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=initialPos_; 5497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=initialRemainingMatchLength_; 5507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=remainingMatchLength_+1; // Remaining match length. 5517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(maxLength_>0 && length>maxLength_) { 5527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=maxLength_; 5537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.truncateString(length); 5557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_+=length; 5567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_-=length; 5577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stack_.clear(); 5587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return this; 5597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return true if there are more elements. 5637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 5647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); } 5667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 5677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 5687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Finds the next (byte sequence, value) pair if there is one. 5697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * 5707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * If the byte sequence is truncated to the maximum length and does not 5717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * have a real value, then the value is set to -1. 5727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * In this case, this "not a real value" is indistinguishable from 5737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * a real value of -1. 5747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @return An Entry with the string and value of the next element. 5757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws NoSuchElementException - iteration has no more elements. 5767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 5777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 5787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public Entry next() { 5797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int pos=pos_; 5807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 5817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(stack_.isEmpty()) { 5827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new NoSuchElementException(); 5837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Pop the state off the stack and continue with the next outbound edge of 5857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the branch node. 5867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long top=stack_.remove(stack_.size()-1); 5877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=(int)top; 5887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=(int)(top>>32); 5897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.truncateString(length&0xffff); 5907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length>>>=16; 5917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length>1) { 5927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=branchNext(pos, length); 5937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 5947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry_; // Reached a final value. 5957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 5977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.append(bytes_[pos++]); 5987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 5997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(remainingMatchLength_>=0) { 6017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // We only get here if we started in a pending linear-match node 6027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // with more than maxLength remaining bytes. 6037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return truncateAndStop(); 6047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 6067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos++]&0xff; 6077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node>=kMinValueLead) { 6087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Deliver value for the byte sequence so far. 6097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isFinal=(node&kValueIsFinal)!=0; 6107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.value=readValue(bytes_, pos, node>>1); 6117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) { 6127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=-1; 6137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 6147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=skipValue(pos, node); 6157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry_; 6177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(maxLength_>0 && entry_.length==maxLength_) { 6197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return truncateAndStop(); 6207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinLinearMatch) { 6227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node==0) { 6237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node=bytes_[pos++]&0xff; 6247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=branchNext(pos, node+1); 6267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(pos<0) { 6277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry_; // Reached a final value. 6287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 6307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Linear-match node, append length bytes to entry_. 6317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=node-kMinLinearMatch+1; 6327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(maxLength_>0 && entry_.length+length>maxLength_) { 6337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.append(bytes_, pos, maxLength_-entry_.length); 6347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return truncateAndStop(); 6357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.append(bytes_, pos, length); 6377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=length; 6387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /** 6437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * Iterator.remove() is not supported. 6447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @throws UnsupportedOperationException (always) 6457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert * @stable ICU 4.8 6467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert */ 6477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert public void remove() { 6487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new UnsupportedOperationException(); 6497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Entry truncateAndStop() { 6527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=-1; 6537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.value=-1; // no real value for str 6547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return entry_; 6557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int branchNext(int pos, int length) { 6587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(length>kMaxBranchLinearSubNodeLength) { 6597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; // ignore the comparison byte 6607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Push state for the greater-or-equal edge. 6617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length); 6627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Follow the less-than edge. 6637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length>>=1; 6647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=jumpByDelta(bytes_, pos); 6657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // List of key-value pairs where values are either final values or jump deltas. 6677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Read the first (key, value) pair. 6687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert byte trieByte=bytes_[pos++]; 6697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos++]&0xff; 6707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isFinal=(node&kValueIsFinal)!=0; 6717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value=readValue(bytes_, pos, node>>1); 6727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 6737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length); 6747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.append(trieByte); 6757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFinal) { 6767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=-1; 6777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert entry_.value=value; 6787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return -1; 6797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 6807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos+value; 6817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 6837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes_; 6857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int pos_; 6867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int initialPos_; 6877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int remainingMatchLength_; 6887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int initialRemainingMatchLength_; 6897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int maxLength_; 6917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Entry entry_; 6927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 6937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The stack stores longs for backtracking to another 6947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // outbound edge of a branch node. 6957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Each long has the offset from bytes_ in bits 62..32, 6967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the entry_.stringLength() from before the node in bits 15..0, 6977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.) 6987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24, 6997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // but the code looks more confusing that way.) 7007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private ArrayList<Long> stack_=new ArrayList<Long>(); 7017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private void stop() { 7047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=-1; 7057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Reads a compact 32-bit integer. 7087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // pos is already after the leadByte, and the lead byte is already shifted right by 1. 7097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int readValue(byte[] bytes, int pos, int leadByte) { 7107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value; 7117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(leadByte<kMinTwoByteValueLead) { 7127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value=leadByte-kMinOneByteValueLead; 7137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(leadByte<kMinThreeByteValueLead) { 7147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff); 7157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(leadByte<kFourByteValueLead) { 7167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff); 7177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(leadByte==kFourByteValueLead) { 7187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff); 7197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff); 7217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return value; 7237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int skipValue(int pos, int leadByte) { 7257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert(leadByte>=kMinValueLead); 7267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(leadByte>=(kMinTwoByteValueLead<<1)) { 7277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(leadByte<(kMinThreeByteValueLead<<1)) { 7287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 7297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(leadByte<(kFourByteValueLead<<1)) { 7307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=2; 7317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=3+((leadByte>>1)&1); 7337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos; 7367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int skipValue(byte[] bytes, int pos) { 7387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int leadByte=bytes[pos++]&0xff; 7397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return skipValue(pos, leadByte); 7407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Reads a jump delta and jumps. 7437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int jumpByDelta(byte[] bytes, int pos) { 7447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta=bytes[pos++]&0xff; 7457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta<kMinTwoByteDeltaLead) { 7467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // nothing to do 7477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(delta<kMinThreeByteDeltaLead) { 7487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff); 7497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(delta<kFourByteDeltaLead) { 7507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff); 7517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=2; 7527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(delta==kFourByteDeltaLead) { 7537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff); 7547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=3; 7557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff); 7577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=4; 7587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos+delta; 7607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static int skipDelta(byte[] bytes, int pos) { 7637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta=bytes[pos++]&0xff; 7647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta>=kMinTwoByteDeltaLead) { 7657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(delta<kMinThreeByteDeltaLead) { 7667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 7677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(delta<kFourByteDeltaLead) { 7687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=2; 7697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=3+(delta&1); 7717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return pos; 7747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE }; 7777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 7787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Handles a branch node for both next(byte) and next(string). 7797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Result branchNext(int pos, int length, int inByte) { 7807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Branch according to the current byte. 7817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(length==0) { 7827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=bytes_[pos++]&0xff; 7837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++length; 7857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The length of the branch is the number of bytes to select from. 7867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The data structure encodes a binary search. 7877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(length>kMaxBranchLinearSubNodeLength) { 7887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte<(bytes_[pos++]&0xff)) { 7897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length>>=1; 7907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=jumpByDelta(bytes_, pos); 7917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 7927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=length-(length>>1); 7937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipDelta(bytes_, pos); 7947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 7967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Drop down to linear search for the last few bytes. 7977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 7987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and divides length by 2. 7997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 8007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte==(bytes_[pos++]&0xff)) { 8017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert Result result; 8027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos]&0xff; 8037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert(node>=kMinValueLead); 8047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if((node&kValueIsFinal)!=0) { 8057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Leave the final value for getValue() to read. 8067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result=Result.FINAL_VALUE; 8077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Use the non-final value as the jump delta. 8097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; 8107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // int delta=readValue(pos, node>>1); 8117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node>>=1; 8127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int delta; 8137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinTwoByteValueLead) { 8147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=node-kMinOneByteValueLead; 8157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node<kMinThreeByteValueLead) { 8167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff); 8177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node<kFourByteValueLead) { 8187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff); 8197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=2; 8207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node==kFourByteValueLead) { 8217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff); 8227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=3; 8237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff); 8257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=4; 8267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // end readValue() 8287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=delta; 8297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node=bytes_[pos]&0xff; 8307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 8317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=pos; 8337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return result; 8347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert --length; 8367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(bytes_, pos); 8377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while(length>1); 8387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte==(bytes_[pos++]&0xff)) { 8397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=pos; 8407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos]&0xff; 8417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 8427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 8447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 8457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Requires remainingLength_<0. 8497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private Result nextImpl(int pos, int inByte) { 8507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 8517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes_[pos++]&0xff; 8527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinLinearMatch) { 8537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return branchNext(pos, node, inByte); 8547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node<kMinValueLead) { 8557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Match the first of length+1 bytes. 8567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int length=node-kMinLinearMatch; // Actual match length minus 1. 8577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(inByte==(bytes_[pos++]&0xff)) { 8587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert remainingMatchLength_=--length; 8597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos_=pos; 8607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ? 8617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert valueResults_[node&kValueIsFinal] : Result.NO_VALUE; 8627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No match. 8647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 8657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if((node&kValueIsFinal)!=0) { 8677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // No further matching bytes. 8687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert break; 8697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 8707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Skip intermediate value. 8717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 8727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The next node must not also be a value node. 8737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert assert((bytes_[pos]&0xff)<kMinValueLead); 8747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert stop(); 8777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return Result.NO_MATCH; 8787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 8807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Helper functions for getUniqueValue(). 8817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Recursively finds a unique value (or whether there is not a unique one) 8827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // from a branch. 8837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue(). 8847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // On return, if not 0, then bits 63..33 contain the updated non-negative pos. 8857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length, 8867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert long uniqueValue) { 8877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(length>kMaxBranchLinearSubNodeLength) { 8887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; // ignore the comparison byte 8897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue); 8907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(uniqueValue==0) { 8917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 8927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=length-(length>>1); 8947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipDelta(bytes, pos); 8957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 8967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 8977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; // ignore a comparison byte 8987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // handle its value 8997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes[pos++]&0xff; 9007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isFinal=(node&kValueIsFinal)!=0; 9017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value=readValue(bytes, pos, node>>1); 9027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 9037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFinal) { 9047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(uniqueValue!=0) { 9057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value!=(int)(uniqueValue>>1)) { 9067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 9077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 9097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uniqueValue=((long)value<<1)|1; 9107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 9127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue); 9137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(uniqueValue==0) { 9147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 9157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while(--length>1); 9187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // ignore the last comparison byte 9197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL); 9207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Recursively finds a unique value (or whether there is not a unique one) 9227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // starting from a position on a node lead byte. 9237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set. 9247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Otherwise, uniqueValue is 0. Bits 63..33 are ignored. 9257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) { 9267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert for(;;) { 9277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int node=bytes[pos++]&0xff; 9287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node<kMinLinearMatch) { 9297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(node==0) { 9307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert node=bytes[pos++]&0xff; 9317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue); 9337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(uniqueValue==0) { 9347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 9357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=(int)(uniqueValue>>>33); 9377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else if(node<kMinValueLead) { 9387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // linear-match node 9397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos+=node-kMinLinearMatch+1; // Ignore the match bytes. 9407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 9417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert boolean isFinal=(node&kValueIsFinal)!=0; 9427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert int value=readValue(bytes, pos, node>>1); 9437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(uniqueValue!=0) { 9447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(value!=(int)(uniqueValue>>1)) { 9457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return 0; 9467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } else { 9487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert uniqueValue=((long)value<<1)|1; 9497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert if(isFinal) { 9517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert return uniqueValue; 9527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(pos, node); 9547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Helper functions for getNextBytes(). 9597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // getNextBytes() when pos is on a branch node. 9607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) { 9617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert while(length>kMaxBranchLinearSubNodeLength) { 9627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert ++pos; // ignore the comparison byte 9637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out); 9647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert length=length-(length>>1); 9657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipDelta(bytes, pos); 9667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert do { 9687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert append(out, bytes[pos++]&0xff); 9697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert pos=skipValue(bytes, pos); 9707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } while(--length>1); 9717935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert append(out, bytes[pos]&0xff); 9727935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9737935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static void append(Appendable out, int c) { 9747935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert try { 9757935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert out.append((char)c); 9767935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } catch(IOException e) { 9777935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert throw new ICUUncheckedIOException(e); 9787935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9797935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert } 9807935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 9817935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // BytesTrie data structure 9827935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 9837935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The trie consists of a series of byte-serialized nodes for incremental 9847935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // string/byte sequence matching. The root node is at the beginning of the trie data. 9857935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 9867935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Types of nodes are distinguished by their node lead byte ranges. 9877935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // After each node, except a final-value node, another node follows to 9887935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // encode match values or continue matching further bytes. 9897935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 9907935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Node types: 9917935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - Value node: Stores a 32-bit integer in a compact, variable-length format. 9927935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The value is for the string/byte sequence so far. 9937935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // One node bit indicates whether the value is final or whether 9947935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // matching continues with the next node. 9957935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - Linear-match node: Matches a number of bytes. 9967935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - Branch node: Branches to other nodes according to the current input byte. 9977935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The node byte is the length of the branch (number of bytes to select from) 9987935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // minus 1. It is followed by a sub-node: 9997935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - If the length is at most kMaxBranchLinearSubNodeLength, then 10007935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // there are length-1 (key, value) pairs and then one more comparison byte. 10017935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If one of the key bytes matches, then the value is either a final value for 10027935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the string/byte sequence so far, or a "jump" delta to the next node. 10037935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If the last byte matches, then matching continues with the next node. 10047935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (Values have the same encoding as value nodes.) 10057935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // - If the length is greater than kMaxBranchLinearSubNodeLength, then 10067935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // there is one byte and one "jump" delta. 10077935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If the input byte is less than the sub-node byte, then "jump" by delta to 10087935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the next sub-node which will have a length of length/2. 10097935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // (The delta has its own compact encoding.) 10107935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Otherwise, skip the "jump" delta to the next sub-node 10117935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // which will have a length of length-length/2. 10127935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10137935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Node lead byte values. 10147935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10157935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise 10167935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // the length is one more than the next byte. 10177935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10187935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // For a branch sub-node with at most this many entries, we drop down 10197935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // to a linear search. 10207935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxBranchLinearSubNodeLength=5; 10217935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10227935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node. 10237935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinLinearMatch=0x10; 10247935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxLinearMatchLength=0x10; 10257935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10267935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // 20..ff: Variable-length value node. 10277935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // If odd, the value is final. (Otherwise, intermediate value or jump delta.) 10287935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Then shift-right by 1 bit. 10297935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // The remaining lead byte value indicates the number of following bytes (0..4) 10307935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // and contains the value's top bits. 10317935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength; // 0x20 10327935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // It is a final value if bit 0 is set. 10337935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private static final int kValueIsFinal=1; 10347935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10357935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds. 10367935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2; // 0x10 10377935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxOneByteValue=0x40; // At least 6 bits in the first byte. 10387935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10397935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1; // 0x51 10407935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxTwoByteValue=0x1aff; 10417935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10427935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1; // 0x6c 10437935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kFourByteValueLead=0x7e; 10447935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10457935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // A little more than Unicode code points. (0x11ffff) 10467935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1; 10477935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10487935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kFiveByteValueLead=0x7f; 10497935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10507935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Compact delta integers. 10517935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxOneByteDelta=0xbf; 10527935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1; // 0xc0 10537935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMinThreeByteDeltaLead=0xf0; 10547935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kFourByteDeltaLead=0xfe; 10557935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kFiveByteDeltaLead=0xff; 10567935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10577935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1; // 0x2fff 10587935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1; // 0xdffff 10597935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10607935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Fixed value referencing the BytesTrie bytes. 10617935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private byte[] bytes_; 10627935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int root_; 10637935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10647935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Iterator variables. 10657935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert 10667935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Index of next trie byte to read. Negative if no more matches. 10677935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int pos_; 10687935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert // Remaining length of a linear-match node, minus 1. Negative if not in such a node. 10697935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert private int remainingMatchLength_; 10707935b1839a081ed19ae0d33029ad3c09632a2caaFredrik Roubert}; 1071