1/*
2*******************************************************************************
3*   Copyright (C) 2010-2014, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   created on: 2010nov23
7*   created by: Markus W. Scherer
8*   ported from ICU4C bytestrie.h/.cpp
9*/
10package com.ibm.icu.util;
11
12import java.io.IOException;
13import java.nio.ByteBuffer;
14import java.util.ArrayList;
15import java.util.NoSuchElementException;
16
17/**
18 * Light-weight, non-const reader class for a BytesTrie.
19 * Traverses a byte-serialized data structure with minimal state,
20 * for mapping byte sequences to non-negative integer values.
21 *
22 * <p>This class is not intended for public subclassing.
23 *
24 * @stable ICU 4.8
25 * @author Markus W. Scherer
26 */
27public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
28    /**
29     * Constructs a BytesTrie reader instance.
30     *
31     * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
32     * with the offset indicating the first byte of that sequence.
33     * The BytesTrie object will not read more bytes than
34     * the BytesTrieBuilder generated in the corresponding build() call.
35     *
36     * <p>The array is not copied/cloned and must not be modified while
37     * the BytesTrie object is in use.
38     *
39     * @param trieBytes Bytes array that contains the serialized trie.
40     * @param offset Root offset of the trie in the array.
41     * @stable ICU 4.8
42     */
43    public BytesTrie(byte[] trieBytes, int offset) {
44        bytes_=trieBytes;
45        pos_=root_=offset;
46        remainingMatchLength_=-1;
47    }
48
49    /**
50     * Clones this trie reader object and its state,
51     * but not the byte array which will be shared.
52     * @return A shallow clone of this trie.
53     * @stable ICU 4.8
54     */
55    @Override
56    public Object clone() throws CloneNotSupportedException {
57        return super.clone();  // A shallow copy is just what we need.
58    }
59
60    /**
61     * Resets this trie to its initial state.
62     * @return this
63     * @stable ICU 4.8
64     */
65    public BytesTrie reset() {
66        pos_=root_;
67        remainingMatchLength_=-1;
68        return this;
69    }
70
71    /**
72     * BytesTrie state object, for saving a trie's current state
73     * and resetting the trie back to this state later.
74     * @stable ICU 4.8
75     */
76    public static final class State {
77        /**
78         * Constructs an empty State.
79         * @stable ICU 4.8
80         */
81        public State() {}
82        private byte[] bytes;
83        private int root;
84        private int pos;
85        private int remainingMatchLength;
86    }
87
88    /**
89     * Saves the state of this trie.
90     * @param state The State object to hold the trie's state.
91     * @return this
92     * @see #resetToState
93     * @stable ICU 4.8
94     */
95    public BytesTrie saveState(State state) /*const*/ {
96        state.bytes=bytes_;
97        state.root=root_;
98        state.pos=pos_;
99        state.remainingMatchLength=remainingMatchLength_;
100        return this;
101    }
102
103    /**
104     * Resets this trie to the saved state.
105     * @param state The State object which holds a saved trie state.
106     * @return this
107     * @throws IllegalArgumentException if the state object contains no state,
108     *         or the state of a different trie
109     * @see #saveState
110     * @see #reset
111     * @stable ICU 4.8
112     */
113    public BytesTrie resetToState(State state) {
114        if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
115            pos_=state.pos;
116            remainingMatchLength_=state.remainingMatchLength;
117        } else {
118            throw new IllegalArgumentException("incompatible trie state");
119        }
120        return this;
121    }
122
123    /**
124     * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
125     * @stable ICU 4.8
126     */
127    public enum Result {
128        /**
129         * The input unit(s) did not continue a matching string.
130         * Once current()/next() return NO_MATCH,
131         * all further calls to current()/next() will also return NO_MATCH,
132         * until the trie is reset to its original state or to a saved state.
133         * @stable ICU 4.8
134         */
135        NO_MATCH,
136        /**
137         * The input unit(s) continued a matching string
138         * but there is no value for the string so far.
139         * (It is a prefix of a longer string.)
140         * @stable ICU 4.8
141         */
142        NO_VALUE,
143        /**
144         * The input unit(s) continued a matching string
145         * and there is a value for the string so far.
146         * This value will be returned by getValue().
147         * No further input byte/unit can continue a matching string.
148         * @stable ICU 4.8
149         */
150        FINAL_VALUE,
151        /**
152         * The input unit(s) continued a matching string
153         * and there is a value for the string so far.
154         * This value will be returned by getValue().
155         * Another input byte/unit can continue a matching string.
156         * @stable ICU 4.8
157         */
158        INTERMEDIATE_VALUE;
159
160        // Note: The following methods assume the particular order
161        // of enum constants, treating the ordinal() values like bit sets.
162        // Do not reorder the enum constants!
163
164        /**
165         * Same as (result!=NO_MATCH).
166         * @return true if the input bytes/units so far are part of a matching string/byte sequence.
167         * @stable ICU 4.8
168         */
169        public boolean matches() { return this!=NO_MATCH; }
170
171        /**
172         * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
173         * @return true if there is a value for the input bytes/units so far.
174         * @see #getValue
175         * @stable ICU 4.8
176         */
177        public boolean hasValue() { return ordinal()>=2; }
178
179        /**
180         * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
181         * @return true if another input byte/unit can continue a matching string.
182         * @stable ICU 4.8
183         */
184        public boolean hasNext() { return (ordinal()&1)!=0; }
185    }
186
187    /**
188     * Determines whether the byte sequence so far matches, whether it has a value,
189     * and whether another input byte can continue a matching byte sequence.
190     * @return The match/value Result.
191     * @stable ICU 4.8
192     */
193    public Result current() /*const*/ {
194        int pos=pos_;
195        if(pos<0) {
196            return Result.NO_MATCH;
197        } else {
198            int node;
199            return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
200                    valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
201        }
202    }
203
204    /**
205     * Traverses the trie from the initial state for this input byte.
206     * Equivalent to reset().next(inByte).
207     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
208     *               Values below -0x100 and above 0xff will never match.
209     * @return The match/value Result.
210     * @stable ICU 4.8
211     */
212    public Result first(int inByte) {
213        remainingMatchLength_=-1;
214        if(inByte<0) {
215            inByte+=0x100;
216        }
217        return nextImpl(root_, inByte);
218    }
219
220    /**
221     * Traverses the trie from the current state for this input byte.
222     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
223     *               Values below -0x100 and above 0xff will never match.
224     * @return The match/value Result.
225     * @stable ICU 4.8
226     */
227    public Result next(int inByte) {
228        int pos=pos_;
229        if(pos<0) {
230            return Result.NO_MATCH;
231        }
232        if(inByte<0) {
233            inByte+=0x100;
234        }
235        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
236        if(length>=0) {
237            // Remaining part of a linear-match node.
238            if(inByte==(bytes_[pos++]&0xff)) {
239                remainingMatchLength_=--length;
240                pos_=pos;
241                int node;
242                return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
243                        valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
244            } else {
245                stop();
246                return Result.NO_MATCH;
247            }
248        }
249        return nextImpl(pos, inByte);
250    }
251
252    /**
253     * Traverses the trie from the current state for this byte sequence.
254     * Equivalent to
255     * <pre>
256     * Result result=current();
257     * for(each c in s)
258     *   if(!result.hasNext()) return Result.NO_MATCH;
259     *   result=next(c);
260     * return result;
261     * </pre>
262     * @param s Contains a string or byte sequence.
263     * @param sIndex The start index of the byte sequence in s.
264     * @param sLimit The (exclusive) end index of the byte sequence in s.
265     * @return The match/value Result.
266     * @stable ICU 4.8
267     */
268    public Result next(byte[] s, int sIndex, int sLimit) {
269        if(sIndex>=sLimit) {
270            // Empty input.
271            return current();
272        }
273        int pos=pos_;
274        if(pos<0) {
275            return Result.NO_MATCH;
276        }
277        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
278        for(;;) {
279            // Fetch the next input byte, if there is one.
280            // Continue a linear-match node.
281            byte inByte;
282            for(;;) {
283                if(sIndex==sLimit) {
284                    remainingMatchLength_=length;
285                    pos_=pos;
286                    int node;
287                    return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
288                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
289                }
290                inByte=s[sIndex++];
291                if(length<0) {
292                    remainingMatchLength_=length;
293                    break;
294                }
295                if(inByte!=bytes_[pos]) {
296                    stop();
297                    return Result.NO_MATCH;
298                }
299                ++pos;
300                --length;
301            }
302            for(;;) {
303                int node=bytes_[pos++]&0xff;
304                if(node<kMinLinearMatch) {
305                    Result result=branchNext(pos, node, inByte&0xff);
306                    if(result==Result.NO_MATCH) {
307                        return Result.NO_MATCH;
308                    }
309                    // Fetch the next input byte, if there is one.
310                    if(sIndex==sLimit) {
311                        return result;
312                    }
313                    if(result==Result.FINAL_VALUE) {
314                        // No further matching bytes.
315                        stop();
316                        return Result.NO_MATCH;
317                    }
318                    inByte=s[sIndex++];
319                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
320                } else if(node<kMinValueLead) {
321                    // Match length+1 bytes.
322                    length=node-kMinLinearMatch;  // Actual match length minus 1.
323                    if(inByte!=bytes_[pos]) {
324                        stop();
325                        return Result.NO_MATCH;
326                    }
327                    ++pos;
328                    --length;
329                    break;
330                } else if((node&kValueIsFinal)!=0) {
331                    // No further matching bytes.
332                    stop();
333                    return Result.NO_MATCH;
334                } else {
335                    // Skip intermediate value.
336                    pos=skipValue(pos, node);
337                    // The next node must not also be a value node.
338                    assert((bytes_[pos]&0xff)<kMinValueLead);
339                }
340            }
341        }
342    }
343
344    /**
345     * Returns a matching byte sequence's value if called immediately after
346     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
347     * getValue() can be called multiple times.
348     *
349     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
350     * @return The value for the byte sequence so far.
351     * @stable ICU 4.8
352     */
353    public int getValue() /*const*/ {
354        int pos=pos_;
355        int leadByte=bytes_[pos++]&0xff;
356        assert(leadByte>=kMinValueLead);
357        return readValue(bytes_, pos, leadByte>>1);
358    }
359
360    /**
361     * Determines whether all byte sequences reachable from the current state
362     * map to the same value, and if so, returns that value.
363     * @return The unique value in bits 32..1 with bit 0 set,
364     *         if all byte sequences reachable from the current state
365     *         map to the same value; otherwise returns 0.
366     * @stable ICU 4.8
367     */
368    public long getUniqueValue() /*const*/ {
369        int pos=pos_;
370        if(pos<0) {
371            return 0;
372        }
373        // Skip the rest of a pending linear-match node.
374        long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
375        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
376        return (uniqueValue<<31)>>31;
377    }
378
379    /**
380     * Finds each byte which continues the byte sequence from the current state.
381     * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
382     * @param out Each next byte is 0-extended to a char and appended to this object.
383     *            (Only uses the out.append(c) method.)
384     * @return The number of bytes which continue the byte sequence from here.
385     * @stable ICU 4.8
386     */
387    public int getNextBytes(Appendable out) /*const*/ {
388        int pos=pos_;
389        if(pos<0) {
390            return 0;
391        }
392        if(remainingMatchLength_>=0) {
393            append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
394            return 1;
395        }
396        int node=bytes_[pos++]&0xff;
397        if(node>=kMinValueLead) {
398            if((node&kValueIsFinal)!=0) {
399                return 0;
400            } else {
401                pos=skipValue(pos, node);
402                node=bytes_[pos++]&0xff;
403                assert(node<kMinValueLead);
404            }
405        }
406        if(node<kMinLinearMatch) {
407            if(node==0) {
408                node=bytes_[pos++]&0xff;
409            }
410            getNextBranchBytes(bytes_, pos, ++node, out);
411            return node;
412        } else {
413            // First byte of the linear-match node.
414            append(out, bytes_[pos]&0xff);
415            return 1;
416        }
417    }
418
419    /**
420     * Iterates from the current state of this trie.
421     * @return A new BytesTrie.Iterator.
422     * @stable ICU 4.8
423     */
424    public Iterator iterator() {
425        return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
426    }
427
428    /**
429     * Iterates from the current state of this trie.
430     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
431     *                        Otherwise, the iterator returns strings with this maximum length.
432     * @return A new BytesTrie.Iterator.
433     * @stable ICU 4.8
434     */
435    public Iterator iterator(int maxStringLength) {
436        return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
437    }
438
439    /**
440     * Iterates from the root of a byte-serialized BytesTrie.
441     * @param trieBytes Bytes array that contains the serialized trie.
442     * @param offset Root offset of the trie in the array.
443     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
444     *                        Otherwise, the iterator returns strings with this maximum length.
445     * @return A new BytesTrie.Iterator.
446     * @stable ICU 4.8
447     */
448    public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
449        return new Iterator(trieBytes, offset, -1, maxStringLength);
450    }
451
452    /**
453     * Return value type for the Iterator.
454     * @stable ICU 4.8
455     */
456    public static final class Entry {
457        private Entry(int capacity) {
458            bytes=new byte[capacity];
459        }
460
461        /**
462         * @return The length of the byte sequence.
463         * @stable ICU 4.8
464         */
465        public int bytesLength() { return length; }
466        /**
467         * Returns a byte of the byte sequence.
468         * @param index An index into the byte sequence.
469         * @return The index-th byte sequence byte.
470         * @stable ICU 4.8
471         */
472        public byte byteAt(int index) { return bytes[index]; }
473        /**
474         * Copies the byte sequence into a byte array.
475         * @param dest Destination byte array.
476         * @param destOffset Starting offset to where in dest the byte sequence is copied.
477         * @stable ICU 4.8
478         */
479        public void copyBytesTo(byte[] dest, int destOffset) {
480            System.arraycopy(bytes, 0, dest, destOffset, length);
481        }
482        /**
483         * @return The byte sequence as a read-only ByteBuffer.
484         * @stable ICU 4.8
485         */
486        public ByteBuffer bytesAsByteBuffer() {
487            return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
488        }
489
490        /**
491         * The value associated with the byte sequence.
492         * @stable ICU 4.8
493         */
494        public int value;
495
496        private void ensureCapacity(int len) {
497            if(bytes.length<len) {
498                byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
499                System.arraycopy(bytes, 0, newBytes, 0, length);
500                bytes=newBytes;
501            }
502        }
503        private void append(byte b) {
504            ensureCapacity(length+1);
505            bytes[length++]=b;
506        }
507        private void append(byte[] b, int off, int len) {
508            ensureCapacity(length+len);
509            System.arraycopy(b, off, bytes, length, len);
510            length+=len;
511        }
512        private void truncateString(int newLength) { length=newLength; }
513
514        private byte[] bytes;
515        private int length;
516    }
517
518    /**
519     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
520     * @stable ICU 4.8
521     */
522    public static final class Iterator implements java.util.Iterator<Entry> {
523        private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
524            bytes_=trieBytes;
525            pos_=initialPos_=offset;
526            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
527            maxLength_=maxStringLength;
528            entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
529            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
530            if(length>=0) {
531                // Pending linear-match node, append remaining bytes to entry_.
532                ++length;
533                if(maxLength_>0 && length>maxLength_) {
534                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
535                }
536                entry_.append(bytes_, pos_, length);
537                pos_+=length;
538                remainingMatchLength_-=length;
539            }
540        }
541
542        /**
543         * Resets this iterator to its initial state.
544         * @return this
545         * @stable ICU 4.8
546         */
547        public Iterator reset() {
548            pos_=initialPos_;
549            remainingMatchLength_=initialRemainingMatchLength_;
550            int length=remainingMatchLength_+1;  // Remaining match length.
551            if(maxLength_>0 && length>maxLength_) {
552                length=maxLength_;
553            }
554            entry_.truncateString(length);
555            pos_+=length;
556            remainingMatchLength_-=length;
557            stack_.clear();
558            return this;
559        }
560
561        /**
562         * @return true if there are more elements.
563         * @stable ICU 4.8
564         */
565        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
566
567        /**
568         * Finds the next (byte sequence, value) pair if there is one.
569         *
570         * If the byte sequence is truncated to the maximum length and does not
571         * have a real value, then the value is set to -1.
572         * In this case, this "not a real value" is indistinguishable from
573         * a real value of -1.
574         * @return An Entry with the string and value of the next element.
575         * @throws NoSuchElementException - iteration has no more elements.
576         * @stable ICU 4.8
577         */
578        public Entry next() {
579            int pos=pos_;
580            if(pos<0) {
581                if(stack_.isEmpty()) {
582                    throw new NoSuchElementException();
583                }
584                // Pop the state off the stack and continue with the next outbound edge of
585                // the branch node.
586                long top=stack_.remove(stack_.size()-1);
587                int length=(int)top;
588                pos=(int)(top>>32);
589                entry_.truncateString(length&0xffff);
590                length>>>=16;
591                if(length>1) {
592                    pos=branchNext(pos, length);
593                    if(pos<0) {
594                        return entry_;  // Reached a final value.
595                    }
596                } else {
597                    entry_.append(bytes_[pos++]);
598                }
599            }
600            if(remainingMatchLength_>=0) {
601                // We only get here if we started in a pending linear-match node
602                // with more than maxLength remaining bytes.
603                return truncateAndStop();
604            }
605            for(;;) {
606                int node=bytes_[pos++]&0xff;
607                if(node>=kMinValueLead) {
608                    // Deliver value for the byte sequence so far.
609                    boolean isFinal=(node&kValueIsFinal)!=0;
610                    entry_.value=readValue(bytes_, pos, node>>1);
611                    if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
612                        pos_=-1;
613                    } else {
614                        pos_=skipValue(pos, node);
615                    }
616                    return entry_;
617                }
618                if(maxLength_>0 && entry_.length==maxLength_) {
619                    return truncateAndStop();
620                }
621                if(node<kMinLinearMatch) {
622                    if(node==0) {
623                        node=bytes_[pos++]&0xff;
624                    }
625                    pos=branchNext(pos, node+1);
626                    if(pos<0) {
627                        return entry_;  // Reached a final value.
628                    }
629                } else {
630                    // Linear-match node, append length bytes to entry_.
631                    int length=node-kMinLinearMatch+1;
632                    if(maxLength_>0 && entry_.length+length>maxLength_) {
633                        entry_.append(bytes_, pos, maxLength_-entry_.length);
634                        return truncateAndStop();
635                    }
636                    entry_.append(bytes_, pos, length);
637                    pos+=length;
638                }
639            }
640        }
641
642        /**
643         * Iterator.remove() is not supported.
644         * @throws UnsupportedOperationException (always)
645         * @stable ICU 4.8
646         */
647        public void remove() {
648            throw new UnsupportedOperationException();
649        }
650
651        private Entry truncateAndStop() {
652            pos_=-1;
653            entry_.value=-1;  // no real value for str
654            return entry_;
655        }
656
657        private int branchNext(int pos, int length) {
658            while(length>kMaxBranchLinearSubNodeLength) {
659                ++pos;  // ignore the comparison byte
660                // Push state for the greater-or-equal edge.
661                stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
662                // Follow the less-than edge.
663                length>>=1;
664                pos=jumpByDelta(bytes_, pos);
665            }
666            // List of key-value pairs where values are either final values or jump deltas.
667            // Read the first (key, value) pair.
668            byte trieByte=bytes_[pos++];
669            int node=bytes_[pos++]&0xff;
670            boolean isFinal=(node&kValueIsFinal)!=0;
671            int value=readValue(bytes_, pos, node>>1);
672            pos=skipValue(pos, node);
673            stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
674            entry_.append(trieByte);
675            if(isFinal) {
676                pos_=-1;
677                entry_.value=value;
678                return -1;
679            } else {
680                return pos+value;
681            }
682        }
683
684        private byte[] bytes_;
685        private int pos_;
686        private int initialPos_;
687        private int remainingMatchLength_;
688        private int initialRemainingMatchLength_;
689
690        private int maxLength_;
691        private Entry entry_;
692
693        // The stack stores longs for backtracking to another
694        // outbound edge of a branch node.
695        // Each long has the offset from bytes_ in bits 62..32,
696        // the entry_.stringLength() from before the node in bits 15..0,
697        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
698        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
699        // but the code looks more confusing that way.)
700        private ArrayList<Long> stack_=new ArrayList<Long>();
701    }
702
703    private void stop() {
704        pos_=-1;
705    }
706
707    // Reads a compact 32-bit integer.
708    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
709    private static int readValue(byte[] bytes, int pos, int leadByte) {
710        int value;
711        if(leadByte<kMinTwoByteValueLead) {
712            value=leadByte-kMinOneByteValueLead;
713        } else if(leadByte<kMinThreeByteValueLead) {
714            value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
715        } else if(leadByte<kFourByteValueLead) {
716            value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
717        } else if(leadByte==kFourByteValueLead) {
718            value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
719        } else {
720            value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
721        }
722        return value;
723    }
724    private static int skipValue(int pos, int leadByte) {
725        assert(leadByte>=kMinValueLead);
726        if(leadByte>=(kMinTwoByteValueLead<<1)) {
727            if(leadByte<(kMinThreeByteValueLead<<1)) {
728                ++pos;
729            } else if(leadByte<(kFourByteValueLead<<1)) {
730                pos+=2;
731            } else {
732                pos+=3+((leadByte>>1)&1);
733            }
734        }
735        return pos;
736    }
737    private static int skipValue(byte[] bytes, int pos) {
738        int leadByte=bytes[pos++]&0xff;
739        return skipValue(pos, leadByte);
740    }
741
742    // Reads a jump delta and jumps.
743    private static int jumpByDelta(byte[] bytes, int pos) {
744        int delta=bytes[pos++]&0xff;
745        if(delta<kMinTwoByteDeltaLead) {
746            // nothing to do
747        } else if(delta<kMinThreeByteDeltaLead) {
748            delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
749        } else if(delta<kFourByteDeltaLead) {
750            delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
751            pos+=2;
752        } else if(delta==kFourByteDeltaLead) {
753            delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
754            pos+=3;
755        } else {
756            delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
757            pos+=4;
758        }
759        return pos+delta;
760    }
761
762    private static int skipDelta(byte[] bytes, int pos) {
763        int delta=bytes[pos++]&0xff;
764        if(delta>=kMinTwoByteDeltaLead) {
765            if(delta<kMinThreeByteDeltaLead) {
766                ++pos;
767            } else if(delta<kFourByteDeltaLead) {
768                pos+=2;
769            } else {
770                pos+=3+(delta&1);
771            }
772        }
773        return pos;
774    }
775
776    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
777
778    // Handles a branch node for both next(byte) and next(string).
779    private Result branchNext(int pos, int length, int inByte) {
780        // Branch according to the current byte.
781        if(length==0) {
782            length=bytes_[pos++]&0xff;
783        }
784        ++length;
785        // The length of the branch is the number of bytes to select from.
786        // The data structure encodes a binary search.
787        while(length>kMaxBranchLinearSubNodeLength) {
788            if(inByte<(bytes_[pos++]&0xff)) {
789                length>>=1;
790                pos=jumpByDelta(bytes_, pos);
791            } else {
792                length=length-(length>>1);
793                pos=skipDelta(bytes_, pos);
794            }
795        }
796        // Drop down to linear search for the last few bytes.
797        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
798        // and divides length by 2.
799        do {
800            if(inByte==(bytes_[pos++]&0xff)) {
801                Result result;
802                int node=bytes_[pos]&0xff;
803                assert(node>=kMinValueLead);
804                if((node&kValueIsFinal)!=0) {
805                    // Leave the final value for getValue() to read.
806                    result=Result.FINAL_VALUE;
807                } else {
808                    // Use the non-final value as the jump delta.
809                    ++pos;
810                    // int delta=readValue(pos, node>>1);
811                    node>>=1;
812                    int delta;
813                    if(node<kMinTwoByteValueLead) {
814                        delta=node-kMinOneByteValueLead;
815                    } else if(node<kMinThreeByteValueLead) {
816                        delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
817                    } else if(node<kFourByteValueLead) {
818                        delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
819                        pos+=2;
820                    } else if(node==kFourByteValueLead) {
821                        delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
822                        pos+=3;
823                    } else {
824                        delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
825                        pos+=4;
826                    }
827                    // end readValue()
828                    pos+=delta;
829                    node=bytes_[pos]&0xff;
830                    result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
831                }
832                pos_=pos;
833                return result;
834            }
835            --length;
836            pos=skipValue(bytes_, pos);
837        } while(length>1);
838        if(inByte==(bytes_[pos++]&0xff)) {
839            pos_=pos;
840            int node=bytes_[pos]&0xff;
841            return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
842        } else {
843            stop();
844            return Result.NO_MATCH;
845        }
846    }
847
848    // Requires remainingLength_<0.
849    private Result nextImpl(int pos, int inByte) {
850        for(;;) {
851            int node=bytes_[pos++]&0xff;
852            if(node<kMinLinearMatch) {
853                return branchNext(pos, node, inByte);
854            } else if(node<kMinValueLead) {
855                // Match the first of length+1 bytes.
856                int length=node-kMinLinearMatch;  // Actual match length minus 1.
857                if(inByte==(bytes_[pos++]&0xff)) {
858                    remainingMatchLength_=--length;
859                    pos_=pos;
860                    return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
861                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
862                } else {
863                    // No match.
864                    break;
865                }
866            } else if((node&kValueIsFinal)!=0) {
867                // No further matching bytes.
868                break;
869            } else {
870                // Skip intermediate value.
871                pos=skipValue(pos, node);
872                // The next node must not also be a value node.
873                assert((bytes_[pos]&0xff)<kMinValueLead);
874            }
875        }
876        stop();
877        return Result.NO_MATCH;
878    }
879
880    // Helper functions for getUniqueValue().
881    // Recursively finds a unique value (or whether there is not a unique one)
882    // from a branch.
883    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
884    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
885    private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
886                                                  long uniqueValue) {
887        while(length>kMaxBranchLinearSubNodeLength) {
888            ++pos;  // ignore the comparison byte
889            uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
890            if(uniqueValue==0) {
891                return 0;
892            }
893            length=length-(length>>1);
894            pos=skipDelta(bytes, pos);
895        }
896        do {
897            ++pos;  // ignore a comparison byte
898            // handle its value
899            int node=bytes[pos++]&0xff;
900            boolean isFinal=(node&kValueIsFinal)!=0;
901            int value=readValue(bytes, pos, node>>1);
902            pos=skipValue(pos, node);
903            if(isFinal) {
904                if(uniqueValue!=0) {
905                    if(value!=(int)(uniqueValue>>1)) {
906                        return 0;
907                    }
908                } else {
909                    uniqueValue=((long)value<<1)|1;
910                }
911            } else {
912                uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
913                if(uniqueValue==0) {
914                    return 0;
915                }
916            }
917        } while(--length>1);
918        // ignore the last comparison byte
919        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
920    }
921    // Recursively finds a unique value (or whether there is not a unique one)
922    // starting from a position on a node lead byte.
923    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
924    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
925    private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
926        for(;;) {
927            int node=bytes[pos++]&0xff;
928            if(node<kMinLinearMatch) {
929                if(node==0) {
930                    node=bytes[pos++]&0xff;
931                }
932                uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
933                if(uniqueValue==0) {
934                    return 0;
935                }
936                pos=(int)(uniqueValue>>>33);
937            } else if(node<kMinValueLead) {
938                // linear-match node
939                pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
940            } else {
941                boolean isFinal=(node&kValueIsFinal)!=0;
942                int value=readValue(bytes, pos, node>>1);
943                if(uniqueValue!=0) {
944                    if(value!=(int)(uniqueValue>>1)) {
945                        return 0;
946                    }
947                } else {
948                    uniqueValue=((long)value<<1)|1;
949                }
950                if(isFinal) {
951                    return uniqueValue;
952                }
953                pos=skipValue(pos, node);
954            }
955        }
956    }
957
958    // Helper functions for getNextBytes().
959    // getNextBytes() when pos is on a branch node.
960    private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
961        while(length>kMaxBranchLinearSubNodeLength) {
962            ++pos;  // ignore the comparison byte
963            getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
964            length=length-(length>>1);
965            pos=skipDelta(bytes, pos);
966        }
967        do {
968            append(out, bytes[pos++]&0xff);
969            pos=skipValue(bytes, pos);
970        } while(--length>1);
971        append(out, bytes[pos]&0xff);
972    }
973    private static void append(Appendable out, int c) {
974        try {
975            out.append((char)c);
976        } catch(IOException e) {
977            throw new ICUUncheckedIOException(e);
978        }
979    }
980
981    // BytesTrie data structure
982    //
983    // The trie consists of a series of byte-serialized nodes for incremental
984    // string/byte sequence matching. The root node is at the beginning of the trie data.
985    //
986    // Types of nodes are distinguished by their node lead byte ranges.
987    // After each node, except a final-value node, another node follows to
988    // encode match values or continue matching further bytes.
989    //
990    // Node types:
991    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
992    //    The value is for the string/byte sequence so far.
993    //    One node bit indicates whether the value is final or whether
994    //    matching continues with the next node.
995    //  - Linear-match node: Matches a number of bytes.
996    //  - Branch node: Branches to other nodes according to the current input byte.
997    //    The node byte is the length of the branch (number of bytes to select from)
998    //    minus 1. It is followed by a sub-node:
999    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
1000    //      there are length-1 (key, value) pairs and then one more comparison byte.
1001    //      If one of the key bytes matches, then the value is either a final value for
1002    //      the string/byte sequence so far, or a "jump" delta to the next node.
1003    //      If the last byte matches, then matching continues with the next node.
1004    //      (Values have the same encoding as value nodes.)
1005    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
1006    //      there is one byte and one "jump" delta.
1007    //      If the input byte is less than the sub-node byte, then "jump" by delta to
1008    //      the next sub-node which will have a length of length/2.
1009    //      (The delta has its own compact encoding.)
1010    //      Otherwise, skip the "jump" delta to the next sub-node
1011    //      which will have a length of length-length/2.
1012
1013    // Node lead byte values.
1014
1015    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
1016    // the length is one more than the next byte.
1017
1018    // For a branch sub-node with at most this many entries, we drop down
1019    // to a linear search.
1020    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
1021
1022    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
1023    /*package*/ static final int kMinLinearMatch=0x10;
1024    /*package*/ static final int kMaxLinearMatchLength=0x10;
1025
1026    // 20..ff: Variable-length value node.
1027    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
1028    // Then shift-right by 1 bit.
1029    // The remaining lead byte value indicates the number of following bytes (0..4)
1030    // and contains the value's top bits.
1031    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1032    // It is a final value if bit 0 is set.
1033    private static final int kValueIsFinal=1;
1034
1035    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1036    /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1037    /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1038
1039    /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1040    /*package*/ static final int kMaxTwoByteValue=0x1aff;
1041
1042    /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1043    /*package*/ static final int kFourByteValueLead=0x7e;
1044
1045    // A little more than Unicode code points. (0x11ffff)
1046    /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1047
1048    /*package*/ static final int kFiveByteValueLead=0x7f;
1049
1050    // Compact delta integers.
1051    /*package*/ static final int kMaxOneByteDelta=0xbf;
1052    /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1053    /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1054    /*package*/ static final int kFourByteDeltaLead=0xfe;
1055    /*package*/ static final int kFiveByteDeltaLead=0xff;
1056
1057    /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1058    /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1059
1060    // Fixed value referencing the BytesTrie bytes.
1061    private byte[] bytes_;
1062    private int root_;
1063
1064    // Iterator variables.
1065
1066    // Index of next trie byte to read. Negative if no more matches.
1067    private int pos_;
1068    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1069    private int remainingMatchLength_;
1070};
1071