BytesTrie.java revision d4d3fc6e0d919d96cdb282a05a05327cdf2c7f4e
1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3*******************************************************************************
4*   Copyright (C) 2010-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*******************************************************************************
7*   created on: 2010nov23
8*   created by: Markus W. Scherer
9*   ported from ICU4C bytestrie.h/.cpp
10*/
11package android.icu.util;
12
13import java.io.IOException;
14import java.nio.ByteBuffer;
15import java.util.ArrayList;
16import java.util.NoSuchElementException;
17
18/**
19 * Light-weight, non-const reader class for a BytesTrie.
20 * Traverses a byte-serialized data structure with minimal state,
21 * for mapping byte sequences to non-negative integer values.
22 *
23 * <p>This class is not intended for public subclassing.
24 *
25 * @author Markus W. Scherer
26 * @hide Only a subset of ICU is exposed in Android
27 */
28public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
29    /**
30     * Constructs a BytesTrie reader instance.
31     *
32     * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
33     * with the offset indicating the first byte of that sequence.
34     * The BytesTrie object will not read more bytes than
35     * the BytesTrieBuilder generated in the corresponding build() call.
36     *
37     * <p>The array is not copied/cloned and must not be modified while
38     * the BytesTrie object is in use.
39     *
40     * @param trieBytes Bytes array that contains the serialized trie.
41     * @param offset Root offset of the trie in the array.
42     */
43    public BytesTrie(byte[] trieBytes, int offset) {
44        bytes_=trieBytes;
45        pos_=root_=offset;
46        remainingMatchLength_=-1;
47    }
48
49    /**
50     * Clones this trie reader object and its state,
51     * but not the byte array which will be shared.
52     * @return A shallow clone of this trie.
53     */
54    @Override
55    public Object clone() throws CloneNotSupportedException {
56        return super.clone();  // A shallow copy is just what we need.
57    }
58
59    /**
60     * Resets this trie to its initial state.
61     * @return this
62     */
63    public BytesTrie reset() {
64        pos_=root_;
65        remainingMatchLength_=-1;
66        return this;
67    }
68
69    /**
70     * BytesTrie state object, for saving a trie's current state
71     * and resetting the trie back to this state later.
72     */
73    public static final class State {
74        /**
75         * Constructs an empty State.
76         */
77        public State() {}
78        private byte[] bytes;
79        private int root;
80        private int pos;
81        private int remainingMatchLength;
82    }
83
84    /**
85     * Saves the state of this trie.
86     * @param state The State object to hold the trie's state.
87     * @return this
88     * @see #resetToState
89     */
90    public BytesTrie saveState(State state) /*const*/ {
91        state.bytes=bytes_;
92        state.root=root_;
93        state.pos=pos_;
94        state.remainingMatchLength=remainingMatchLength_;
95        return this;
96    }
97
98    /**
99     * Resets this trie to the saved state.
100     * @param state The State object which holds a saved trie state.
101     * @return this
102     * @throws IllegalArgumentException if the state object contains no state,
103     *         or the state of a different trie
104     * @see #saveState
105     * @see #reset
106     */
107    public BytesTrie resetToState(State state) {
108        if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
109            pos_=state.pos;
110            remainingMatchLength_=state.remainingMatchLength;
111        } else {
112            throw new IllegalArgumentException("incompatible trie state");
113        }
114        return this;
115    }
116
117    /**
118     * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
119     */
120    public enum Result {
121        /**
122         * The input unit(s) did not continue a matching string.
123         * Once current()/next() return NO_MATCH,
124         * all further calls to current()/next() will also return NO_MATCH,
125         * until the trie is reset to its original state or to a saved state.
126         */
127        NO_MATCH,
128        /**
129         * The input unit(s) continued a matching string
130         * but there is no value for the string so far.
131         * (It is a prefix of a longer string.)
132         */
133        NO_VALUE,
134        /**
135         * The input unit(s) continued a matching string
136         * and there is a value for the string so far.
137         * This value will be returned by getValue().
138         * No further input byte/unit can continue a matching string.
139         */
140        FINAL_VALUE,
141        /**
142         * The input unit(s) continued a matching string
143         * and there is a value for the string so far.
144         * This value will be returned by getValue().
145         * Another input byte/unit can continue a matching string.
146         */
147        INTERMEDIATE_VALUE;
148
149        // Note: The following methods assume the particular order
150        // of enum constants, treating the ordinal() values like bit sets.
151        // Do not reorder the enum constants!
152
153        /**
154         * Same as (result!=NO_MATCH).
155         * @return true if the input bytes/units so far are part of a matching string/byte sequence.
156         */
157        public boolean matches() { return this!=NO_MATCH; }
158
159        /**
160         * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
161         * @return true if there is a value for the input bytes/units so far.
162         * @see #getValue
163         */
164        public boolean hasValue() { return ordinal()>=2; }
165
166        /**
167         * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
168         * @return true if another input byte/unit can continue a matching string.
169         */
170        public boolean hasNext() { return (ordinal()&1)!=0; }
171    }
172
173    /**
174     * Determines whether the byte sequence so far matches, whether it has a value,
175     * and whether another input byte can continue a matching byte sequence.
176     * @return The match/value Result.
177     */
178    public Result current() /*const*/ {
179        int pos=pos_;
180        if(pos<0) {
181            return Result.NO_MATCH;
182        } else {
183            int node;
184            return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
185                    valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
186        }
187    }
188
189    /**
190     * Traverses the trie from the initial state for this input byte.
191     * Equivalent to reset().next(inByte).
192     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
193     *               Values below -0x100 and above 0xff will never match.
194     * @return The match/value Result.
195     */
196    public Result first(int inByte) {
197        remainingMatchLength_=-1;
198        if(inByte<0) {
199            inByte+=0x100;
200        }
201        return nextImpl(root_, inByte);
202    }
203
204    /**
205     * Traverses the trie from the current state for this input byte.
206     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
207     *               Values below -0x100 and above 0xff will never match.
208     * @return The match/value Result.
209     */
210    public Result next(int inByte) {
211        int pos=pos_;
212        if(pos<0) {
213            return Result.NO_MATCH;
214        }
215        if(inByte<0) {
216            inByte+=0x100;
217        }
218        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
219        if(length>=0) {
220            // Remaining part of a linear-match node.
221            if(inByte==(bytes_[pos++]&0xff)) {
222                remainingMatchLength_=--length;
223                pos_=pos;
224                int node;
225                return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
226                        valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
227            } else {
228                stop();
229                return Result.NO_MATCH;
230            }
231        }
232        return nextImpl(pos, inByte);
233    }
234
235    /**
236     * Traverses the trie from the current state for this byte sequence.
237     * Equivalent to
238     * <pre>
239     * Result result=current();
240     * for(each c in s)
241     *   if(!result.hasNext()) return Result.NO_MATCH;
242     *   result=next(c);
243     * return result;
244     * </pre>
245     * @param s Contains a string or byte sequence.
246     * @param sIndex The start index of the byte sequence in s.
247     * @param sLimit The (exclusive) end index of the byte sequence in s.
248     * @return The match/value Result.
249     */
250    public Result next(byte[] s, int sIndex, int sLimit) {
251        if(sIndex>=sLimit) {
252            // Empty input.
253            return current();
254        }
255        int pos=pos_;
256        if(pos<0) {
257            return Result.NO_MATCH;
258        }
259        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
260        for(;;) {
261            // Fetch the next input byte, if there is one.
262            // Continue a linear-match node.
263            byte inByte;
264            for(;;) {
265                if(sIndex==sLimit) {
266                    remainingMatchLength_=length;
267                    pos_=pos;
268                    int node;
269                    return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
270                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
271                }
272                inByte=s[sIndex++];
273                if(length<0) {
274                    remainingMatchLength_=length;
275                    break;
276                }
277                if(inByte!=bytes_[pos]) {
278                    stop();
279                    return Result.NO_MATCH;
280                }
281                ++pos;
282                --length;
283            }
284            for(;;) {
285                int node=bytes_[pos++]&0xff;
286                if(node<kMinLinearMatch) {
287                    Result result=branchNext(pos, node, inByte&0xff);
288                    if(result==Result.NO_MATCH) {
289                        return Result.NO_MATCH;
290                    }
291                    // Fetch the next input byte, if there is one.
292                    if(sIndex==sLimit) {
293                        return result;
294                    }
295                    if(result==Result.FINAL_VALUE) {
296                        // No further matching bytes.
297                        stop();
298                        return Result.NO_MATCH;
299                    }
300                    inByte=s[sIndex++];
301                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
302                } else if(node<kMinValueLead) {
303                    // Match length+1 bytes.
304                    length=node-kMinLinearMatch;  // Actual match length minus 1.
305                    if(inByte!=bytes_[pos]) {
306                        stop();
307                        return Result.NO_MATCH;
308                    }
309                    ++pos;
310                    --length;
311                    break;
312                } else if((node&kValueIsFinal)!=0) {
313                    // No further matching bytes.
314                    stop();
315                    return Result.NO_MATCH;
316                } else {
317                    // Skip intermediate value.
318                    pos=skipValue(pos, node);
319                    // The next node must not also be a value node.
320                    assert((bytes_[pos]&0xff)<kMinValueLead);
321                }
322            }
323        }
324    }
325
326    /**
327     * Returns a matching byte sequence's value if called immediately after
328     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
329     * getValue() can be called multiple times.
330     *
331     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
332     * @return The value for the byte sequence so far.
333     */
334    public int getValue() /*const*/ {
335        int pos=pos_;
336        int leadByte=bytes_[pos++]&0xff;
337        assert(leadByte>=kMinValueLead);
338        return readValue(bytes_, pos, leadByte>>1);
339    }
340
341    /**
342     * Determines whether all byte sequences reachable from the current state
343     * map to the same value, and if so, returns that value.
344     * @return The unique value in bits 32..1 with bit 0 set,
345     *         if all byte sequences reachable from the current state
346     *         map to the same value; otherwise returns 0.
347     */
348    public long getUniqueValue() /*const*/ {
349        int pos=pos_;
350        if(pos<0) {
351            return 0;
352        }
353        // Skip the rest of a pending linear-match node.
354        long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
355        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
356        return (uniqueValue<<31)>>31;
357    }
358
359    /**
360     * Finds each byte which continues the byte sequence from the current state.
361     * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
362     * @param out Each next byte is 0-extended to a char and appended to this object.
363     *            (Only uses the out.append(c) method.)
364     * @return The number of bytes which continue the byte sequence from here.
365     */
366    public int getNextBytes(Appendable out) /*const*/ {
367        int pos=pos_;
368        if(pos<0) {
369            return 0;
370        }
371        if(remainingMatchLength_>=0) {
372            append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
373            return 1;
374        }
375        int node=bytes_[pos++]&0xff;
376        if(node>=kMinValueLead) {
377            if((node&kValueIsFinal)!=0) {
378                return 0;
379            } else {
380                pos=skipValue(pos, node);
381                node=bytes_[pos++]&0xff;
382                assert(node<kMinValueLead);
383            }
384        }
385        if(node<kMinLinearMatch) {
386            if(node==0) {
387                node=bytes_[pos++]&0xff;
388            }
389            getNextBranchBytes(bytes_, pos, ++node, out);
390            return node;
391        } else {
392            // First byte of the linear-match node.
393            append(out, bytes_[pos]&0xff);
394            return 1;
395        }
396    }
397
398    /**
399     * Iterates from the current state of this trie.
400     * @return A new BytesTrie.Iterator.
401     */
402    public Iterator iterator() {
403        return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
404    }
405
406    /**
407     * Iterates from the current state of this trie.
408     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
409     *                        Otherwise, the iterator returns strings with this maximum length.
410     * @return A new BytesTrie.Iterator.
411     */
412    public Iterator iterator(int maxStringLength) {
413        return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
414    }
415
416    /**
417     * Iterates from the root of a byte-serialized BytesTrie.
418     * @param trieBytes Bytes array that contains the serialized trie.
419     * @param offset Root offset of the trie in the array.
420     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
421     *                        Otherwise, the iterator returns strings with this maximum length.
422     * @return A new BytesTrie.Iterator.
423     */
424    public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
425        return new Iterator(trieBytes, offset, -1, maxStringLength);
426    }
427
428    /**
429     * Return value type for the Iterator.
430     */
431    public static final class Entry {
432        private Entry(int capacity) {
433            bytes=new byte[capacity];
434        }
435
436        /**
437         * @return The length of the byte sequence.
438         */
439        public int bytesLength() { return length; }
440        /**
441         * Returns a byte of the byte sequence.
442         * @param index An index into the byte sequence.
443         * @return The index-th byte sequence byte.
444         */
445        public byte byteAt(int index) { return bytes[index]; }
446        /**
447         * Copies the byte sequence into a byte array.
448         * @param dest Destination byte array.
449         * @param destOffset Starting offset to where in dest the byte sequence is copied.
450         */
451        public void copyBytesTo(byte[] dest, int destOffset) {
452            System.arraycopy(bytes, 0, dest, destOffset, length);
453        }
454        /**
455         * @return The byte sequence as a read-only ByteBuffer.
456         */
457        public ByteBuffer bytesAsByteBuffer() {
458            return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
459        }
460
461        /**
462         * The value associated with the byte sequence.
463         */
464        public int value;
465
466        private void ensureCapacity(int len) {
467            if(bytes.length<len) {
468                byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
469                System.arraycopy(bytes, 0, newBytes, 0, length);
470                bytes=newBytes;
471            }
472        }
473        private void append(byte b) {
474            ensureCapacity(length+1);
475            bytes[length++]=b;
476        }
477        private void append(byte[] b, int off, int len) {
478            ensureCapacity(length+len);
479            System.arraycopy(b, off, bytes, length, len);
480            length+=len;
481        }
482        private void truncateString(int newLength) { length=newLength; }
483
484        private byte[] bytes;
485        private int length;
486    }
487
488    /**
489     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
490     */
491    public static final class Iterator implements java.util.Iterator<Entry> {
492        private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
493            bytes_=trieBytes;
494            pos_=initialPos_=offset;
495            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
496            maxLength_=maxStringLength;
497            entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
498            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
499            if(length>=0) {
500                // Pending linear-match node, append remaining bytes to entry_.
501                ++length;
502                if(maxLength_>0 && length>maxLength_) {
503                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
504                }
505                entry_.append(bytes_, pos_, length);
506                pos_+=length;
507                remainingMatchLength_-=length;
508            }
509        }
510
511        /**
512         * Resets this iterator to its initial state.
513         * @return this
514         */
515        public Iterator reset() {
516            pos_=initialPos_;
517            remainingMatchLength_=initialRemainingMatchLength_;
518            int length=remainingMatchLength_+1;  // Remaining match length.
519            if(maxLength_>0 && length>maxLength_) {
520                length=maxLength_;
521            }
522            entry_.truncateString(length);
523            pos_+=length;
524            remainingMatchLength_-=length;
525            stack_.clear();
526            return this;
527        }
528
529        /**
530         * @return true if there are more elements.
531         */
532        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
533
534        /**
535         * Finds the next (byte sequence, value) pair if there is one.
536         *
537         * If the byte sequence is truncated to the maximum length and does not
538         * have a real value, then the value is set to -1.
539         * In this case, this "not a real value" is indistinguishable from
540         * a real value of -1.
541         * @return An Entry with the string and value of the next element.
542         * @throws NoSuchElementException - iteration has no more elements.
543         */
544        public Entry next() {
545            int pos=pos_;
546            if(pos<0) {
547                if(stack_.isEmpty()) {
548                    throw new NoSuchElementException();
549                }
550                // Pop the state off the stack and continue with the next outbound edge of
551                // the branch node.
552                long top=stack_.remove(stack_.size()-1);
553                int length=(int)top;
554                pos=(int)(top>>32);
555                entry_.truncateString(length&0xffff);
556                length>>>=16;
557                if(length>1) {
558                    pos=branchNext(pos, length);
559                    if(pos<0) {
560                        return entry_;  // Reached a final value.
561                    }
562                } else {
563                    entry_.append(bytes_[pos++]);
564                }
565            }
566            if(remainingMatchLength_>=0) {
567                // We only get here if we started in a pending linear-match node
568                // with more than maxLength remaining bytes.
569                return truncateAndStop();
570            }
571            for(;;) {
572                int node=bytes_[pos++]&0xff;
573                if(node>=kMinValueLead) {
574                    // Deliver value for the byte sequence so far.
575                    boolean isFinal=(node&kValueIsFinal)!=0;
576                    entry_.value=readValue(bytes_, pos, node>>1);
577                    if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
578                        pos_=-1;
579                    } else {
580                        pos_=skipValue(pos, node);
581                    }
582                    return entry_;
583                }
584                if(maxLength_>0 && entry_.length==maxLength_) {
585                    return truncateAndStop();
586                }
587                if(node<kMinLinearMatch) {
588                    if(node==0) {
589                        node=bytes_[pos++]&0xff;
590                    }
591                    pos=branchNext(pos, node+1);
592                    if(pos<0) {
593                        return entry_;  // Reached a final value.
594                    }
595                } else {
596                    // Linear-match node, append length bytes to entry_.
597                    int length=node-kMinLinearMatch+1;
598                    if(maxLength_>0 && entry_.length+length>maxLength_) {
599                        entry_.append(bytes_, pos, maxLength_-entry_.length);
600                        return truncateAndStop();
601                    }
602                    entry_.append(bytes_, pos, length);
603                    pos+=length;
604                }
605            }
606        }
607
608        /**
609         * Iterator.remove() is not supported.
610         * @throws UnsupportedOperationException (always)
611         */
612        public void remove() {
613            throw new UnsupportedOperationException();
614        }
615
616        private Entry truncateAndStop() {
617            pos_=-1;
618            entry_.value=-1;  // no real value for str
619            return entry_;
620        }
621
622        private int branchNext(int pos, int length) {
623            while(length>kMaxBranchLinearSubNodeLength) {
624                ++pos;  // ignore the comparison byte
625                // Push state for the greater-or-equal edge.
626                stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
627                // Follow the less-than edge.
628                length>>=1;
629                pos=jumpByDelta(bytes_, pos);
630            }
631            // List of key-value pairs where values are either final values or jump deltas.
632            // Read the first (key, value) pair.
633            byte trieByte=bytes_[pos++];
634            int node=bytes_[pos++]&0xff;
635            boolean isFinal=(node&kValueIsFinal)!=0;
636            int value=readValue(bytes_, pos, node>>1);
637            pos=skipValue(pos, node);
638            stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
639            entry_.append(trieByte);
640            if(isFinal) {
641                pos_=-1;
642                entry_.value=value;
643                return -1;
644            } else {
645                return pos+value;
646            }
647        }
648
649        private byte[] bytes_;
650        private int pos_;
651        private int initialPos_;
652        private int remainingMatchLength_;
653        private int initialRemainingMatchLength_;
654
655        private int maxLength_;
656        private Entry entry_;
657
658        // The stack stores longs for backtracking to another
659        // outbound edge of a branch node.
660        // Each long has the offset from bytes_ in bits 62..32,
661        // the entry_.stringLength() from before the node in bits 15..0,
662        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
663        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
664        // but the code looks more confusing that way.)
665        private ArrayList<Long> stack_=new ArrayList<Long>();
666    }
667
668    private void stop() {
669        pos_=-1;
670    }
671
672    // Reads a compact 32-bit integer.
673    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
674    private static int readValue(byte[] bytes, int pos, int leadByte) {
675        int value;
676        if(leadByte<kMinTwoByteValueLead) {
677            value=leadByte-kMinOneByteValueLead;
678        } else if(leadByte<kMinThreeByteValueLead) {
679            value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
680        } else if(leadByte<kFourByteValueLead) {
681            value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
682        } else if(leadByte==kFourByteValueLead) {
683            value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
684        } else {
685            value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
686        }
687        return value;
688    }
689    private static int skipValue(int pos, int leadByte) {
690        assert(leadByte>=kMinValueLead);
691        if(leadByte>=(kMinTwoByteValueLead<<1)) {
692            if(leadByte<(kMinThreeByteValueLead<<1)) {
693                ++pos;
694            } else if(leadByte<(kFourByteValueLead<<1)) {
695                pos+=2;
696            } else {
697                pos+=3+((leadByte>>1)&1);
698            }
699        }
700        return pos;
701    }
702    private static int skipValue(byte[] bytes, int pos) {
703        int leadByte=bytes[pos++]&0xff;
704        return skipValue(pos, leadByte);
705    }
706
707    // Reads a jump delta and jumps.
708    private static int jumpByDelta(byte[] bytes, int pos) {
709        int delta=bytes[pos++]&0xff;
710        if(delta<kMinTwoByteDeltaLead) {
711            // nothing to do
712        } else if(delta<kMinThreeByteDeltaLead) {
713            delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
714        } else if(delta<kFourByteDeltaLead) {
715            delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
716            pos+=2;
717        } else if(delta==kFourByteDeltaLead) {
718            delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
719            pos+=3;
720        } else {
721            delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
722            pos+=4;
723        }
724        return pos+delta;
725    }
726
727    private static int skipDelta(byte[] bytes, int pos) {
728        int delta=bytes[pos++]&0xff;
729        if(delta>=kMinTwoByteDeltaLead) {
730            if(delta<kMinThreeByteDeltaLead) {
731                ++pos;
732            } else if(delta<kFourByteDeltaLead) {
733                pos+=2;
734            } else {
735                pos+=3+(delta&1);
736            }
737        }
738        return pos;
739    }
740
741    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
742
743    // Handles a branch node for both next(byte) and next(string).
744    private Result branchNext(int pos, int length, int inByte) {
745        // Branch according to the current byte.
746        if(length==0) {
747            length=bytes_[pos++]&0xff;
748        }
749        ++length;
750        // The length of the branch is the number of bytes to select from.
751        // The data structure encodes a binary search.
752        while(length>kMaxBranchLinearSubNodeLength) {
753            if(inByte<(bytes_[pos++]&0xff)) {
754                length>>=1;
755                pos=jumpByDelta(bytes_, pos);
756            } else {
757                length=length-(length>>1);
758                pos=skipDelta(bytes_, pos);
759            }
760        }
761        // Drop down to linear search for the last few bytes.
762        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
763        // and divides length by 2.
764        do {
765            if(inByte==(bytes_[pos++]&0xff)) {
766                Result result;
767                int node=bytes_[pos]&0xff;
768                assert(node>=kMinValueLead);
769                if((node&kValueIsFinal)!=0) {
770                    // Leave the final value for getValue() to read.
771                    result=Result.FINAL_VALUE;
772                } else {
773                    // Use the non-final value as the jump delta.
774                    ++pos;
775                    // int delta=readValue(pos, node>>1);
776                    node>>=1;
777                    int delta;
778                    if(node<kMinTwoByteValueLead) {
779                        delta=node-kMinOneByteValueLead;
780                    } else if(node<kMinThreeByteValueLead) {
781                        delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
782                    } else if(node<kFourByteValueLead) {
783                        delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
784                        pos+=2;
785                    } else if(node==kFourByteValueLead) {
786                        delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
787                        pos+=3;
788                    } else {
789                        delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
790                        pos+=4;
791                    }
792                    // end readValue()
793                    pos+=delta;
794                    node=bytes_[pos]&0xff;
795                    result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
796                }
797                pos_=pos;
798                return result;
799            }
800            --length;
801            pos=skipValue(bytes_, pos);
802        } while(length>1);
803        if(inByte==(bytes_[pos++]&0xff)) {
804            pos_=pos;
805            int node=bytes_[pos]&0xff;
806            return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
807        } else {
808            stop();
809            return Result.NO_MATCH;
810        }
811    }
812
813    // Requires remainingLength_<0.
814    private Result nextImpl(int pos, int inByte) {
815        for(;;) {
816            int node=bytes_[pos++]&0xff;
817            if(node<kMinLinearMatch) {
818                return branchNext(pos, node, inByte);
819            } else if(node<kMinValueLead) {
820                // Match the first of length+1 bytes.
821                int length=node-kMinLinearMatch;  // Actual match length minus 1.
822                if(inByte==(bytes_[pos++]&0xff)) {
823                    remainingMatchLength_=--length;
824                    pos_=pos;
825                    return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
826                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
827                } else {
828                    // No match.
829                    break;
830                }
831            } else if((node&kValueIsFinal)!=0) {
832                // No further matching bytes.
833                break;
834            } else {
835                // Skip intermediate value.
836                pos=skipValue(pos, node);
837                // The next node must not also be a value node.
838                assert((bytes_[pos]&0xff)<kMinValueLead);
839            }
840        }
841        stop();
842        return Result.NO_MATCH;
843    }
844
845    // Helper functions for getUniqueValue().
846    // Recursively finds a unique value (or whether there is not a unique one)
847    // from a branch.
848    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
849    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
850    private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
851                                                  long uniqueValue) {
852        while(length>kMaxBranchLinearSubNodeLength) {
853            ++pos;  // ignore the comparison byte
854            uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
855            if(uniqueValue==0) {
856                return 0;
857            }
858            length=length-(length>>1);
859            pos=skipDelta(bytes, pos);
860        }
861        do {
862            ++pos;  // ignore a comparison byte
863            // handle its value
864            int node=bytes[pos++]&0xff;
865            boolean isFinal=(node&kValueIsFinal)!=0;
866            int value=readValue(bytes, pos, node>>1);
867            pos=skipValue(pos, node);
868            if(isFinal) {
869                if(uniqueValue!=0) {
870                    if(value!=(int)(uniqueValue>>1)) {
871                        return 0;
872                    }
873                } else {
874                    uniqueValue=((long)value<<1)|1;
875                }
876            } else {
877                uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
878                if(uniqueValue==0) {
879                    return 0;
880                }
881            }
882        } while(--length>1);
883        // ignore the last comparison byte
884        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
885    }
886    // Recursively finds a unique value (or whether there is not a unique one)
887    // starting from a position on a node lead byte.
888    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
889    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
890    private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
891        for(;;) {
892            int node=bytes[pos++]&0xff;
893            if(node<kMinLinearMatch) {
894                if(node==0) {
895                    node=bytes[pos++]&0xff;
896                }
897                uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
898                if(uniqueValue==0) {
899                    return 0;
900                }
901                pos=(int)(uniqueValue>>>33);
902            } else if(node<kMinValueLead) {
903                // linear-match node
904                pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
905            } else {
906                boolean isFinal=(node&kValueIsFinal)!=0;
907                int value=readValue(bytes, pos, node>>1);
908                if(uniqueValue!=0) {
909                    if(value!=(int)(uniqueValue>>1)) {
910                        return 0;
911                    }
912                } else {
913                    uniqueValue=((long)value<<1)|1;
914                }
915                if(isFinal) {
916                    return uniqueValue;
917                }
918                pos=skipValue(pos, node);
919            }
920        }
921    }
922
923    // Helper functions for getNextBytes().
924    // getNextBytes() when pos is on a branch node.
925    private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
926        while(length>kMaxBranchLinearSubNodeLength) {
927            ++pos;  // ignore the comparison byte
928            getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
929            length=length-(length>>1);
930            pos=skipDelta(bytes, pos);
931        }
932        do {
933            append(out, bytes[pos++]&0xff);
934            pos=skipValue(bytes, pos);
935        } while(--length>1);
936        append(out, bytes[pos]&0xff);
937    }
938    private static void append(Appendable out, int c) {
939        try {
940            out.append((char)c);
941        } catch(IOException e) {
942            throw new ICUUncheckedIOException(e);
943        }
944    }
945
946    // BytesTrie data structure
947    //
948    // The trie consists of a series of byte-serialized nodes for incremental
949    // string/byte sequence matching. The root node is at the beginning of the trie data.
950    //
951    // Types of nodes are distinguished by their node lead byte ranges.
952    // After each node, except a final-value node, another node follows to
953    // encode match values or continue matching further bytes.
954    //
955    // Node types:
956    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
957    //    The value is for the string/byte sequence so far.
958    //    One node bit indicates whether the value is final or whether
959    //    matching continues with the next node.
960    //  - Linear-match node: Matches a number of bytes.
961    //  - Branch node: Branches to other nodes according to the current input byte.
962    //    The node byte is the length of the branch (number of bytes to select from)
963    //    minus 1. It is followed by a sub-node:
964    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
965    //      there are length-1 (key, value) pairs and then one more comparison byte.
966    //      If one of the key bytes matches, then the value is either a final value for
967    //      the string/byte sequence so far, or a "jump" delta to the next node.
968    //      If the last byte matches, then matching continues with the next node.
969    //      (Values have the same encoding as value nodes.)
970    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
971    //      there is one byte and one "jump" delta.
972    //      If the input byte is less than the sub-node byte, then "jump" by delta to
973    //      the next sub-node which will have a length of length/2.
974    //      (The delta has its own compact encoding.)
975    //      Otherwise, skip the "jump" delta to the next sub-node
976    //      which will have a length of length-length/2.
977
978    // Node lead byte values.
979
980    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
981    // the length is one more than the next byte.
982
983    // For a branch sub-node with at most this many entries, we drop down
984    // to a linear search.
985    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
986
987    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
988    /*package*/ static final int kMinLinearMatch=0x10;
989    /*package*/ static final int kMaxLinearMatchLength=0x10;
990
991    // 20..ff: Variable-length value node.
992    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
993    // Then shift-right by 1 bit.
994    // The remaining lead byte value indicates the number of following bytes (0..4)
995    // and contains the value's top bits.
996    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
997    // It is a final value if bit 0 is set.
998    private static final int kValueIsFinal=1;
999
1000    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1001    /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1002    /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1003
1004    /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1005    /*package*/ static final int kMaxTwoByteValue=0x1aff;
1006
1007    /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1008    /*package*/ static final int kFourByteValueLead=0x7e;
1009
1010    // A little more than Unicode code points. (0x11ffff)
1011    /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1012
1013    /*package*/ static final int kFiveByteValueLead=0x7f;
1014
1015    // Compact delta integers.
1016    /*package*/ static final int kMaxOneByteDelta=0xbf;
1017    /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1018    /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1019    /*package*/ static final int kFourByteDeltaLead=0xfe;
1020    /*package*/ static final int kFiveByteDeltaLead=0xff;
1021
1022    /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1023    /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1024
1025    // Fixed value referencing the BytesTrie bytes.
1026    private byte[] bytes_;
1027    private int root_;
1028
1029    // Iterator variables.
1030
1031    // Index of next trie byte to read. Negative if no more matches.
1032    private int pos_;
1033    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1034    private int remainingMatchLength_;
1035};
1036