1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4*******************************************************************************
5*   Copyright (C) 2010-2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   created on: 2010nov23
9*   created by: Markus W. Scherer
10*   ported from ICU4C bytestrie.h/.cpp
11*/
12package com.ibm.icu.util;
13
14import java.io.IOException;
15import java.nio.ByteBuffer;
16import java.util.ArrayList;
17import java.util.NoSuchElementException;
18
19/**
20 * Light-weight, non-const reader class for a BytesTrie.
21 * Traverses a byte-serialized data structure with minimal state,
22 * for mapping byte sequences to non-negative integer values.
23 *
24 * <p>This class is not intended for public subclassing.
25 *
26 * @stable ICU 4.8
27 * @author Markus W. Scherer
28 */
29public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
30    /**
31     * Constructs a BytesTrie reader instance.
32     *
33     * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
34     * with the offset indicating the first byte of that sequence.
35     * The BytesTrie object will not read more bytes than
36     * the BytesTrieBuilder generated in the corresponding build() call.
37     *
38     * <p>The array is not copied/cloned and must not be modified while
39     * the BytesTrie object is in use.
40     *
41     * @param trieBytes Bytes array that contains the serialized trie.
42     * @param offset Root offset of the trie in the array.
43     * @stable ICU 4.8
44     */
45    public BytesTrie(byte[] trieBytes, int offset) {
46        bytes_=trieBytes;
47        pos_=root_=offset;
48        remainingMatchLength_=-1;
49    }
50
51    /**
52     * Clones this trie reader object and its state,
53     * but not the byte array which will be shared.
54     * @return A shallow clone of this trie.
55     * @stable ICU 4.8
56     */
57    @Override
58    public Object clone() throws CloneNotSupportedException {
59        return super.clone();  // A shallow copy is just what we need.
60    }
61
62    /**
63     * Resets this trie to its initial state.
64     * @return this
65     * @stable ICU 4.8
66     */
67    public BytesTrie reset() {
68        pos_=root_;
69        remainingMatchLength_=-1;
70        return this;
71    }
72
73    /**
74     * BytesTrie state object, for saving a trie's current state
75     * and resetting the trie back to this state later.
76     * @stable ICU 4.8
77     */
78    public static final class State {
79        /**
80         * Constructs an empty State.
81         * @stable ICU 4.8
82         */
83        public State() {}
84        private byte[] bytes;
85        private int root;
86        private int pos;
87        private int remainingMatchLength;
88    }
89
90    /**
91     * Saves the state of this trie.
92     * @param state The State object to hold the trie's state.
93     * @return this
94     * @see #resetToState
95     * @stable ICU 4.8
96     */
97    public BytesTrie saveState(State state) /*const*/ {
98        state.bytes=bytes_;
99        state.root=root_;
100        state.pos=pos_;
101        state.remainingMatchLength=remainingMatchLength_;
102        return this;
103    }
104
105    /**
106     * Resets this trie to the saved state.
107     * @param state The State object which holds a saved trie state.
108     * @return this
109     * @throws IllegalArgumentException if the state object contains no state,
110     *         or the state of a different trie
111     * @see #saveState
112     * @see #reset
113     * @stable ICU 4.8
114     */
115    public BytesTrie resetToState(State state) {
116        if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
117            pos_=state.pos;
118            remainingMatchLength_=state.remainingMatchLength;
119        } else {
120            throw new IllegalArgumentException("incompatible trie state");
121        }
122        return this;
123    }
124
125    /**
126     * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
127     * @stable ICU 4.8
128     */
129    public enum Result {
130        /**
131         * The input unit(s) did not continue a matching string.
132         * Once current()/next() return NO_MATCH,
133         * all further calls to current()/next() will also return NO_MATCH,
134         * until the trie is reset to its original state or to a saved state.
135         * @stable ICU 4.8
136         */
137        NO_MATCH,
138        /**
139         * The input unit(s) continued a matching string
140         * but there is no value for the string so far.
141         * (It is a prefix of a longer string.)
142         * @stable ICU 4.8
143         */
144        NO_VALUE,
145        /**
146         * The input unit(s) continued a matching string
147         * and there is a value for the string so far.
148         * This value will be returned by getValue().
149         * No further input byte/unit can continue a matching string.
150         * @stable ICU 4.8
151         */
152        FINAL_VALUE,
153        /**
154         * The input unit(s) continued a matching string
155         * and there is a value for the string so far.
156         * This value will be returned by getValue().
157         * Another input byte/unit can continue a matching string.
158         * @stable ICU 4.8
159         */
160        INTERMEDIATE_VALUE;
161
162        // Note: The following methods assume the particular order
163        // of enum constants, treating the ordinal() values like bit sets.
164        // Do not reorder the enum constants!
165
166        /**
167         * Same as (result!=NO_MATCH).
168         * @return true if the input bytes/units so far are part of a matching string/byte sequence.
169         * @stable ICU 4.8
170         */
171        public boolean matches() { return this!=NO_MATCH; }
172
173        /**
174         * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
175         * @return true if there is a value for the input bytes/units so far.
176         * @see #getValue
177         * @stable ICU 4.8
178         */
179        public boolean hasValue() { return ordinal()>=2; }
180
181        /**
182         * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
183         * @return true if another input byte/unit can continue a matching string.
184         * @stable ICU 4.8
185         */
186        public boolean hasNext() { return (ordinal()&1)!=0; }
187    }
188
189    /**
190     * Determines whether the byte sequence so far matches, whether it has a value,
191     * and whether another input byte can continue a matching byte sequence.
192     * @return The match/value Result.
193     * @stable ICU 4.8
194     */
195    public Result current() /*const*/ {
196        int pos=pos_;
197        if(pos<0) {
198            return Result.NO_MATCH;
199        } else {
200            int node;
201            return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
202                    valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
203        }
204    }
205
206    /**
207     * Traverses the trie from the initial state for this input byte.
208     * Equivalent to reset().next(inByte).
209     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
210     *               Values below -0x100 and above 0xff will never match.
211     * @return The match/value Result.
212     * @stable ICU 4.8
213     */
214    public Result first(int inByte) {
215        remainingMatchLength_=-1;
216        if(inByte<0) {
217            inByte+=0x100;
218        }
219        return nextImpl(root_, inByte);
220    }
221
222    /**
223     * Traverses the trie from the current state for this input byte.
224     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
225     *               Values below -0x100 and above 0xff will never match.
226     * @return The match/value Result.
227     * @stable ICU 4.8
228     */
229    public Result next(int inByte) {
230        int pos=pos_;
231        if(pos<0) {
232            return Result.NO_MATCH;
233        }
234        if(inByte<0) {
235            inByte+=0x100;
236        }
237        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
238        if(length>=0) {
239            // Remaining part of a linear-match node.
240            if(inByte==(bytes_[pos++]&0xff)) {
241                remainingMatchLength_=--length;
242                pos_=pos;
243                int node;
244                return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
245                        valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
246            } else {
247                stop();
248                return Result.NO_MATCH;
249            }
250        }
251        return nextImpl(pos, inByte);
252    }
253
254    /**
255     * Traverses the trie from the current state for this byte sequence.
256     * Equivalent to
257     * <pre>
258     * Result result=current();
259     * for(each c in s)
260     *   if(!result.hasNext()) return Result.NO_MATCH;
261     *   result=next(c);
262     * return result;
263     * </pre>
264     * @param s Contains a string or byte sequence.
265     * @param sIndex The start index of the byte sequence in s.
266     * @param sLimit The (exclusive) end index of the byte sequence in s.
267     * @return The match/value Result.
268     * @stable ICU 4.8
269     */
270    public Result next(byte[] s, int sIndex, int sLimit) {
271        if(sIndex>=sLimit) {
272            // Empty input.
273            return current();
274        }
275        int pos=pos_;
276        if(pos<0) {
277            return Result.NO_MATCH;
278        }
279        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
280        for(;;) {
281            // Fetch the next input byte, if there is one.
282            // Continue a linear-match node.
283            byte inByte;
284            for(;;) {
285                if(sIndex==sLimit) {
286                    remainingMatchLength_=length;
287                    pos_=pos;
288                    int node;
289                    return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
290                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
291                }
292                inByte=s[sIndex++];
293                if(length<0) {
294                    remainingMatchLength_=length;
295                    break;
296                }
297                if(inByte!=bytes_[pos]) {
298                    stop();
299                    return Result.NO_MATCH;
300                }
301                ++pos;
302                --length;
303            }
304            for(;;) {
305                int node=bytes_[pos++]&0xff;
306                if(node<kMinLinearMatch) {
307                    Result result=branchNext(pos, node, inByte&0xff);
308                    if(result==Result.NO_MATCH) {
309                        return Result.NO_MATCH;
310                    }
311                    // Fetch the next input byte, if there is one.
312                    if(sIndex==sLimit) {
313                        return result;
314                    }
315                    if(result==Result.FINAL_VALUE) {
316                        // No further matching bytes.
317                        stop();
318                        return Result.NO_MATCH;
319                    }
320                    inByte=s[sIndex++];
321                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
322                } else if(node<kMinValueLead) {
323                    // Match length+1 bytes.
324                    length=node-kMinLinearMatch;  // Actual match length minus 1.
325                    if(inByte!=bytes_[pos]) {
326                        stop();
327                        return Result.NO_MATCH;
328                    }
329                    ++pos;
330                    --length;
331                    break;
332                } else if((node&kValueIsFinal)!=0) {
333                    // No further matching bytes.
334                    stop();
335                    return Result.NO_MATCH;
336                } else {
337                    // Skip intermediate value.
338                    pos=skipValue(pos, node);
339                    // The next node must not also be a value node.
340                    assert((bytes_[pos]&0xff)<kMinValueLead);
341                }
342            }
343        }
344    }
345
346    /**
347     * Returns a matching byte sequence's value if called immediately after
348     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
349     * getValue() can be called multiple times.
350     *
351     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
352     * @return The value for the byte sequence so far.
353     * @stable ICU 4.8
354     */
355    public int getValue() /*const*/ {
356        int pos=pos_;
357        int leadByte=bytes_[pos++]&0xff;
358        assert(leadByte>=kMinValueLead);
359        return readValue(bytes_, pos, leadByte>>1);
360    }
361
362    /**
363     * Determines whether all byte sequences reachable from the current state
364     * map to the same value, and if so, returns that value.
365     * @return The unique value in bits 32..1 with bit 0 set,
366     *         if all byte sequences reachable from the current state
367     *         map to the same value; otherwise returns 0.
368     * @stable ICU 4.8
369     */
370    public long getUniqueValue() /*const*/ {
371        int pos=pos_;
372        if(pos<0) {
373            return 0;
374        }
375        // Skip the rest of a pending linear-match node.
376        long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
377        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
378        return (uniqueValue<<31)>>31;
379    }
380
381    /**
382     * Finds each byte which continues the byte sequence from the current state.
383     * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
384     * @param out Each next byte is 0-extended to a char and appended to this object.
385     *            (Only uses the out.append(c) method.)
386     * @return The number of bytes which continue the byte sequence from here.
387     * @stable ICU 4.8
388     */
389    public int getNextBytes(Appendable out) /*const*/ {
390        int pos=pos_;
391        if(pos<0) {
392            return 0;
393        }
394        if(remainingMatchLength_>=0) {
395            append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
396            return 1;
397        }
398        int node=bytes_[pos++]&0xff;
399        if(node>=kMinValueLead) {
400            if((node&kValueIsFinal)!=0) {
401                return 0;
402            } else {
403                pos=skipValue(pos, node);
404                node=bytes_[pos++]&0xff;
405                assert(node<kMinValueLead);
406            }
407        }
408        if(node<kMinLinearMatch) {
409            if(node==0) {
410                node=bytes_[pos++]&0xff;
411            }
412            getNextBranchBytes(bytes_, pos, ++node, out);
413            return node;
414        } else {
415            // First byte of the linear-match node.
416            append(out, bytes_[pos]&0xff);
417            return 1;
418        }
419    }
420
421    /**
422     * Iterates from the current state of this trie.
423     * @return A new BytesTrie.Iterator.
424     * @stable ICU 4.8
425     */
426    @Override
427    public Iterator iterator() {
428        return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
429    }
430
431    /**
432     * Iterates from the current state of this trie.
433     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
434     *                        Otherwise, the iterator returns strings with this maximum length.
435     * @return A new BytesTrie.Iterator.
436     * @stable ICU 4.8
437     */
438    public Iterator iterator(int maxStringLength) {
439        return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
440    }
441
442    /**
443     * Iterates from the root of a byte-serialized BytesTrie.
444     * @param trieBytes Bytes array that contains the serialized trie.
445     * @param offset Root offset of the trie in the array.
446     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
447     *                        Otherwise, the iterator returns strings with this maximum length.
448     * @return A new BytesTrie.Iterator.
449     * @stable ICU 4.8
450     */
451    public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
452        return new Iterator(trieBytes, offset, -1, maxStringLength);
453    }
454
455    /**
456     * Return value type for the Iterator.
457     * @stable ICU 4.8
458     */
459    public static final class Entry {
460        private Entry(int capacity) {
461            bytes=new byte[capacity];
462        }
463
464        /**
465         * @return The length of the byte sequence.
466         * @stable ICU 4.8
467         */
468        public int bytesLength() { return length; }
469        /**
470         * Returns a byte of the byte sequence.
471         * @param index An index into the byte sequence.
472         * @return The index-th byte sequence byte.
473         * @stable ICU 4.8
474         */
475        public byte byteAt(int index) { return bytes[index]; }
476        /**
477         * Copies the byte sequence into a byte array.
478         * @param dest Destination byte array.
479         * @param destOffset Starting offset to where in dest the byte sequence is copied.
480         * @stable ICU 4.8
481         */
482        public void copyBytesTo(byte[] dest, int destOffset) {
483            System.arraycopy(bytes, 0, dest, destOffset, length);
484        }
485        /**
486         * @return The byte sequence as a read-only ByteBuffer.
487         * @stable ICU 4.8
488         */
489        public ByteBuffer bytesAsByteBuffer() {
490            return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
491        }
492
493        /**
494         * The value associated with the byte sequence.
495         * @stable ICU 4.8
496         */
497        public int value;
498
499        private void ensureCapacity(int len) {
500            if(bytes.length<len) {
501                byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
502                System.arraycopy(bytes, 0, newBytes, 0, length);
503                bytes=newBytes;
504            }
505        }
506        private void append(byte b) {
507            ensureCapacity(length+1);
508            bytes[length++]=b;
509        }
510        private void append(byte[] b, int off, int len) {
511            ensureCapacity(length+len);
512            System.arraycopy(b, off, bytes, length, len);
513            length+=len;
514        }
515        private void truncateString(int newLength) { length=newLength; }
516
517        private byte[] bytes;
518        private int length;
519    }
520
521    /**
522     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
523     * @stable ICU 4.8
524     */
525    public static final class Iterator implements java.util.Iterator<Entry> {
526        private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
527            bytes_=trieBytes;
528            pos_=initialPos_=offset;
529            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
530            maxLength_=maxStringLength;
531            entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
532            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
533            if(length>=0) {
534                // Pending linear-match node, append remaining bytes to entry_.
535                ++length;
536                if(maxLength_>0 && length>maxLength_) {
537                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
538                }
539                entry_.append(bytes_, pos_, length);
540                pos_+=length;
541                remainingMatchLength_-=length;
542            }
543        }
544
545        /**
546         * Resets this iterator to its initial state.
547         * @return this
548         * @stable ICU 4.8
549         */
550        public Iterator reset() {
551            pos_=initialPos_;
552            remainingMatchLength_=initialRemainingMatchLength_;
553            int length=remainingMatchLength_+1;  // Remaining match length.
554            if(maxLength_>0 && length>maxLength_) {
555                length=maxLength_;
556            }
557            entry_.truncateString(length);
558            pos_+=length;
559            remainingMatchLength_-=length;
560            stack_.clear();
561            return this;
562        }
563
564        /**
565         * @return true if there are more elements.
566         * @stable ICU 4.8
567         */
568        @Override
569        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
570
571        /**
572         * Finds the next (byte sequence, value) pair if there is one.
573         *
574         * If the byte sequence is truncated to the maximum length and does not
575         * have a real value, then the value is set to -1.
576         * In this case, this "not a real value" is indistinguishable from
577         * a real value of -1.
578         * @return An Entry with the string and value of the next element.
579         * @throws NoSuchElementException - iteration has no more elements.
580         * @stable ICU 4.8
581         */
582        @Override
583        public Entry next() {
584            int pos=pos_;
585            if(pos<0) {
586                if(stack_.isEmpty()) {
587                    throw new NoSuchElementException();
588                }
589                // Pop the state off the stack and continue with the next outbound edge of
590                // the branch node.
591                long top=stack_.remove(stack_.size()-1);
592                int length=(int)top;
593                pos=(int)(top>>32);
594                entry_.truncateString(length&0xffff);
595                length>>>=16;
596                if(length>1) {
597                    pos=branchNext(pos, length);
598                    if(pos<0) {
599                        return entry_;  // Reached a final value.
600                    }
601                } else {
602                    entry_.append(bytes_[pos++]);
603                }
604            }
605            if(remainingMatchLength_>=0) {
606                // We only get here if we started in a pending linear-match node
607                // with more than maxLength remaining bytes.
608                return truncateAndStop();
609            }
610            for(;;) {
611                int node=bytes_[pos++]&0xff;
612                if(node>=kMinValueLead) {
613                    // Deliver value for the byte sequence so far.
614                    boolean isFinal=(node&kValueIsFinal)!=0;
615                    entry_.value=readValue(bytes_, pos, node>>1);
616                    if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
617                        pos_=-1;
618                    } else {
619                        pos_=skipValue(pos, node);
620                    }
621                    return entry_;
622                }
623                if(maxLength_>0 && entry_.length==maxLength_) {
624                    return truncateAndStop();
625                }
626                if(node<kMinLinearMatch) {
627                    if(node==0) {
628                        node=bytes_[pos++]&0xff;
629                    }
630                    pos=branchNext(pos, node+1);
631                    if(pos<0) {
632                        return entry_;  // Reached a final value.
633                    }
634                } else {
635                    // Linear-match node, append length bytes to entry_.
636                    int length=node-kMinLinearMatch+1;
637                    if(maxLength_>0 && entry_.length+length>maxLength_) {
638                        entry_.append(bytes_, pos, maxLength_-entry_.length);
639                        return truncateAndStop();
640                    }
641                    entry_.append(bytes_, pos, length);
642                    pos+=length;
643                }
644            }
645        }
646
647        /**
648         * Iterator.remove() is not supported.
649         * @throws UnsupportedOperationException (always)
650         * @stable ICU 4.8
651         */
652        @Override
653        public void remove() {
654            throw new UnsupportedOperationException();
655        }
656
657        private Entry truncateAndStop() {
658            pos_=-1;
659            entry_.value=-1;  // no real value for str
660            return entry_;
661        }
662
663        private int branchNext(int pos, int length) {
664            while(length>kMaxBranchLinearSubNodeLength) {
665                ++pos;  // ignore the comparison byte
666                // Push state for the greater-or-equal edge.
667                stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
668                // Follow the less-than edge.
669                length>>=1;
670                pos=jumpByDelta(bytes_, pos);
671            }
672            // List of key-value pairs where values are either final values or jump deltas.
673            // Read the first (key, value) pair.
674            byte trieByte=bytes_[pos++];
675            int node=bytes_[pos++]&0xff;
676            boolean isFinal=(node&kValueIsFinal)!=0;
677            int value=readValue(bytes_, pos, node>>1);
678            pos=skipValue(pos, node);
679            stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
680            entry_.append(trieByte);
681            if(isFinal) {
682                pos_=-1;
683                entry_.value=value;
684                return -1;
685            } else {
686                return pos+value;
687            }
688        }
689
690        private byte[] bytes_;
691        private int pos_;
692        private int initialPos_;
693        private int remainingMatchLength_;
694        private int initialRemainingMatchLength_;
695
696        private int maxLength_;
697        private Entry entry_;
698
699        // The stack stores longs for backtracking to another
700        // outbound edge of a branch node.
701        // Each long has the offset from bytes_ in bits 62..32,
702        // the entry_.stringLength() from before the node in bits 15..0,
703        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
704        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
705        // but the code looks more confusing that way.)
706        private ArrayList<Long> stack_=new ArrayList<Long>();
707    }
708
709    private void stop() {
710        pos_=-1;
711    }
712
713    // Reads a compact 32-bit integer.
714    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
715    private static int readValue(byte[] bytes, int pos, int leadByte) {
716        int value;
717        if(leadByte<kMinTwoByteValueLead) {
718            value=leadByte-kMinOneByteValueLead;
719        } else if(leadByte<kMinThreeByteValueLead) {
720            value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
721        } else if(leadByte<kFourByteValueLead) {
722            value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
723        } else if(leadByte==kFourByteValueLead) {
724            value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
725        } else {
726            value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
727        }
728        return value;
729    }
730    private static int skipValue(int pos, int leadByte) {
731        assert(leadByte>=kMinValueLead);
732        if(leadByte>=(kMinTwoByteValueLead<<1)) {
733            if(leadByte<(kMinThreeByteValueLead<<1)) {
734                ++pos;
735            } else if(leadByte<(kFourByteValueLead<<1)) {
736                pos+=2;
737            } else {
738                pos+=3+((leadByte>>1)&1);
739            }
740        }
741        return pos;
742    }
743    private static int skipValue(byte[] bytes, int pos) {
744        int leadByte=bytes[pos++]&0xff;
745        return skipValue(pos, leadByte);
746    }
747
748    // Reads a jump delta and jumps.
749    private static int jumpByDelta(byte[] bytes, int pos) {
750        int delta=bytes[pos++]&0xff;
751        if(delta<kMinTwoByteDeltaLead) {
752            // nothing to do
753        } else if(delta<kMinThreeByteDeltaLead) {
754            delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
755        } else if(delta<kFourByteDeltaLead) {
756            delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
757            pos+=2;
758        } else if(delta==kFourByteDeltaLead) {
759            delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
760            pos+=3;
761        } else {
762            delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
763            pos+=4;
764        }
765        return pos+delta;
766    }
767
768    private static int skipDelta(byte[] bytes, int pos) {
769        int delta=bytes[pos++]&0xff;
770        if(delta>=kMinTwoByteDeltaLead) {
771            if(delta<kMinThreeByteDeltaLead) {
772                ++pos;
773            } else if(delta<kFourByteDeltaLead) {
774                pos+=2;
775            } else {
776                pos+=3+(delta&1);
777            }
778        }
779        return pos;
780    }
781
782    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
783
784    // Handles a branch node for both next(byte) and next(string).
785    private Result branchNext(int pos, int length, int inByte) {
786        // Branch according to the current byte.
787        if(length==0) {
788            length=bytes_[pos++]&0xff;
789        }
790        ++length;
791        // The length of the branch is the number of bytes to select from.
792        // The data structure encodes a binary search.
793        while(length>kMaxBranchLinearSubNodeLength) {
794            if(inByte<(bytes_[pos++]&0xff)) {
795                length>>=1;
796                pos=jumpByDelta(bytes_, pos);
797            } else {
798                length=length-(length>>1);
799                pos=skipDelta(bytes_, pos);
800            }
801        }
802        // Drop down to linear search for the last few bytes.
803        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
804        // and divides length by 2.
805        do {
806            if(inByte==(bytes_[pos++]&0xff)) {
807                Result result;
808                int node=bytes_[pos]&0xff;
809                assert(node>=kMinValueLead);
810                if((node&kValueIsFinal)!=0) {
811                    // Leave the final value for getValue() to read.
812                    result=Result.FINAL_VALUE;
813                } else {
814                    // Use the non-final value as the jump delta.
815                    ++pos;
816                    // int delta=readValue(pos, node>>1);
817                    node>>=1;
818                    int delta;
819                    if(node<kMinTwoByteValueLead) {
820                        delta=node-kMinOneByteValueLead;
821                    } else if(node<kMinThreeByteValueLead) {
822                        delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
823                    } else if(node<kFourByteValueLead) {
824                        delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
825                        pos+=2;
826                    } else if(node==kFourByteValueLead) {
827                        delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
828                        pos+=3;
829                    } else {
830                        delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
831                        pos+=4;
832                    }
833                    // end readValue()
834                    pos+=delta;
835                    node=bytes_[pos]&0xff;
836                    result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
837                }
838                pos_=pos;
839                return result;
840            }
841            --length;
842            pos=skipValue(bytes_, pos);
843        } while(length>1);
844        if(inByte==(bytes_[pos++]&0xff)) {
845            pos_=pos;
846            int node=bytes_[pos]&0xff;
847            return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
848        } else {
849            stop();
850            return Result.NO_MATCH;
851        }
852    }
853
854    // Requires remainingLength_<0.
855    private Result nextImpl(int pos, int inByte) {
856        for(;;) {
857            int node=bytes_[pos++]&0xff;
858            if(node<kMinLinearMatch) {
859                return branchNext(pos, node, inByte);
860            } else if(node<kMinValueLead) {
861                // Match the first of length+1 bytes.
862                int length=node-kMinLinearMatch;  // Actual match length minus 1.
863                if(inByte==(bytes_[pos++]&0xff)) {
864                    remainingMatchLength_=--length;
865                    pos_=pos;
866                    return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
867                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
868                } else {
869                    // No match.
870                    break;
871                }
872            } else if((node&kValueIsFinal)!=0) {
873                // No further matching bytes.
874                break;
875            } else {
876                // Skip intermediate value.
877                pos=skipValue(pos, node);
878                // The next node must not also be a value node.
879                assert((bytes_[pos]&0xff)<kMinValueLead);
880            }
881        }
882        stop();
883        return Result.NO_MATCH;
884    }
885
886    // Helper functions for getUniqueValue().
887    // Recursively finds a unique value (or whether there is not a unique one)
888    // from a branch.
889    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
890    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
891    private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
892                                                  long uniqueValue) {
893        while(length>kMaxBranchLinearSubNodeLength) {
894            ++pos;  // ignore the comparison byte
895            uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
896            if(uniqueValue==0) {
897                return 0;
898            }
899            length=length-(length>>1);
900            pos=skipDelta(bytes, pos);
901        }
902        do {
903            ++pos;  // ignore a comparison byte
904            // handle its value
905            int node=bytes[pos++]&0xff;
906            boolean isFinal=(node&kValueIsFinal)!=0;
907            int value=readValue(bytes, pos, node>>1);
908            pos=skipValue(pos, node);
909            if(isFinal) {
910                if(uniqueValue!=0) {
911                    if(value!=(int)(uniqueValue>>1)) {
912                        return 0;
913                    }
914                } else {
915                    uniqueValue=((long)value<<1)|1;
916                }
917            } else {
918                uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
919                if(uniqueValue==0) {
920                    return 0;
921                }
922            }
923        } while(--length>1);
924        // ignore the last comparison byte
925        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
926    }
927    // Recursively finds a unique value (or whether there is not a unique one)
928    // starting from a position on a node lead byte.
929    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
930    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
931    private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
932        for(;;) {
933            int node=bytes[pos++]&0xff;
934            if(node<kMinLinearMatch) {
935                if(node==0) {
936                    node=bytes[pos++]&0xff;
937                }
938                uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
939                if(uniqueValue==0) {
940                    return 0;
941                }
942                pos=(int)(uniqueValue>>>33);
943            } else if(node<kMinValueLead) {
944                // linear-match node
945                pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
946            } else {
947                boolean isFinal=(node&kValueIsFinal)!=0;
948                int value=readValue(bytes, pos, node>>1);
949                if(uniqueValue!=0) {
950                    if(value!=(int)(uniqueValue>>1)) {
951                        return 0;
952                    }
953                } else {
954                    uniqueValue=((long)value<<1)|1;
955                }
956                if(isFinal) {
957                    return uniqueValue;
958                }
959                pos=skipValue(pos, node);
960            }
961        }
962    }
963
964    // Helper functions for getNextBytes().
965    // getNextBytes() when pos is on a branch node.
966    private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
967        while(length>kMaxBranchLinearSubNodeLength) {
968            ++pos;  // ignore the comparison byte
969            getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
970            length=length-(length>>1);
971            pos=skipDelta(bytes, pos);
972        }
973        do {
974            append(out, bytes[pos++]&0xff);
975            pos=skipValue(bytes, pos);
976        } while(--length>1);
977        append(out, bytes[pos]&0xff);
978    }
979    private static void append(Appendable out, int c) {
980        try {
981            out.append((char)c);
982        } catch(IOException e) {
983            throw new ICUUncheckedIOException(e);
984        }
985    }
986
987    // BytesTrie data structure
988    //
989    // The trie consists of a series of byte-serialized nodes for incremental
990    // string/byte sequence matching. The root node is at the beginning of the trie data.
991    //
992    // Types of nodes are distinguished by their node lead byte ranges.
993    // After each node, except a final-value node, another node follows to
994    // encode match values or continue matching further bytes.
995    //
996    // Node types:
997    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
998    //    The value is for the string/byte sequence so far.
999    //    One node bit indicates whether the value is final or whether
1000    //    matching continues with the next node.
1001    //  - Linear-match node: Matches a number of bytes.
1002    //  - Branch node: Branches to other nodes according to the current input byte.
1003    //    The node byte is the length of the branch (number of bytes to select from)
1004    //    minus 1. It is followed by a sub-node:
1005    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
1006    //      there are length-1 (key, value) pairs and then one more comparison byte.
1007    //      If one of the key bytes matches, then the value is either a final value for
1008    //      the string/byte sequence so far, or a "jump" delta to the next node.
1009    //      If the last byte matches, then matching continues with the next node.
1010    //      (Values have the same encoding as value nodes.)
1011    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
1012    //      there is one byte and one "jump" delta.
1013    //      If the input byte is less than the sub-node byte, then "jump" by delta to
1014    //      the next sub-node which will have a length of length/2.
1015    //      (The delta has its own compact encoding.)
1016    //      Otherwise, skip the "jump" delta to the next sub-node
1017    //      which will have a length of length-length/2.
1018
1019    // Node lead byte values.
1020
1021    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
1022    // the length is one more than the next byte.
1023
1024    // For a branch sub-node with at most this many entries, we drop down
1025    // to a linear search.
1026    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
1027
1028    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
1029    /*package*/ static final int kMinLinearMatch=0x10;
1030    /*package*/ static final int kMaxLinearMatchLength=0x10;
1031
1032    // 20..ff: Variable-length value node.
1033    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
1034    // Then shift-right by 1 bit.
1035    // The remaining lead byte value indicates the number of following bytes (0..4)
1036    // and contains the value's top bits.
1037    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1038    // It is a final value if bit 0 is set.
1039    private static final int kValueIsFinal=1;
1040
1041    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1042    /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1043    /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1044
1045    /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1046    /*package*/ static final int kMaxTwoByteValue=0x1aff;
1047
1048    /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1049    /*package*/ static final int kFourByteValueLead=0x7e;
1050
1051    // A little more than Unicode code points. (0x11ffff)
1052    /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1053
1054    /*package*/ static final int kFiveByteValueLead=0x7f;
1055
1056    // Compact delta integers.
1057    /*package*/ static final int kMaxOneByteDelta=0xbf;
1058    /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1059    /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1060    /*package*/ static final int kFourByteDeltaLead=0xfe;
1061    /*package*/ static final int kFiveByteDeltaLead=0xff;
1062
1063    /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1064    /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1065
1066    // Fixed value referencing the BytesTrie bytes.
1067    private byte[] bytes_;
1068    private int root_;
1069
1070    // Iterator variables.
1071
1072    // Index of next trie byte to read. Negative if no more matches.
1073    private int pos_;
1074    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1075    private int remainingMatchLength_;
1076};
1077