1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6*   Copyright (C) 2010-2014, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*******************************************************************************
9*   created on: 2010nov23
10*   created by: Markus W. Scherer
11*   ported from ICU4C bytestrie.h/.cpp
12*/
13package android.icu.util;
14
15import java.io.IOException;
16import java.nio.ByteBuffer;
17import java.util.ArrayList;
18import java.util.NoSuchElementException;
19
20/**
21 * Light-weight, non-const reader class for a BytesTrie.
22 * Traverses a byte-serialized data structure with minimal state,
23 * for mapping byte sequences to non-negative integer values.
24 *
25 * <p>This class is not intended for public subclassing.
26 *
27 * @author Markus W. Scherer
28 * @hide Only a subset of ICU is exposed in Android
29 */
30public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
31    /**
32     * Constructs a BytesTrie reader instance.
33     *
34     * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
35     * with the offset indicating the first byte of that sequence.
36     * The BytesTrie object will not read more bytes than
37     * the BytesTrieBuilder generated in the corresponding build() call.
38     *
39     * <p>The array is not copied/cloned and must not be modified while
40     * the BytesTrie object is in use.
41     *
42     * @param trieBytes Bytes array that contains the serialized trie.
43     * @param offset Root offset of the trie in the array.
44     */
45    public BytesTrie(byte[] trieBytes, int offset) {
46        bytes_=trieBytes;
47        pos_=root_=offset;
48        remainingMatchLength_=-1;
49    }
50
51    /**
52     * Clones this trie reader object and its state,
53     * but not the byte array which will be shared.
54     * @return A shallow clone of this trie.
55     */
56    @Override
57    public Object clone() throws CloneNotSupportedException {
58        return super.clone();  // A shallow copy is just what we need.
59    }
60
61    /**
62     * Resets this trie to its initial state.
63     * @return this
64     */
65    public BytesTrie reset() {
66        pos_=root_;
67        remainingMatchLength_=-1;
68        return this;
69    }
70
71    /**
72     * BytesTrie state object, for saving a trie's current state
73     * and resetting the trie back to this state later.
74     */
75    public static final class State {
76        /**
77         * Constructs an empty State.
78         */
79        public State() {}
80        private byte[] bytes;
81        private int root;
82        private int pos;
83        private int remainingMatchLength;
84    }
85
86    /**
87     * Saves the state of this trie.
88     * @param state The State object to hold the trie's state.
89     * @return this
90     * @see #resetToState
91     */
92    public BytesTrie saveState(State state) /*const*/ {
93        state.bytes=bytes_;
94        state.root=root_;
95        state.pos=pos_;
96        state.remainingMatchLength=remainingMatchLength_;
97        return this;
98    }
99
100    /**
101     * Resets this trie to the saved state.
102     * @param state The State object which holds a saved trie state.
103     * @return this
104     * @throws IllegalArgumentException if the state object contains no state,
105     *         or the state of a different trie
106     * @see #saveState
107     * @see #reset
108     */
109    public BytesTrie resetToState(State state) {
110        if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
111            pos_=state.pos;
112            remainingMatchLength_=state.remainingMatchLength;
113        } else {
114            throw new IllegalArgumentException("incompatible trie state");
115        }
116        return this;
117    }
118
119    /**
120     * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
121     */
122    public enum Result {
123        /**
124         * The input unit(s) did not continue a matching string.
125         * Once current()/next() return NO_MATCH,
126         * all further calls to current()/next() will also return NO_MATCH,
127         * until the trie is reset to its original state or to a saved state.
128         */
129        NO_MATCH,
130        /**
131         * The input unit(s) continued a matching string
132         * but there is no value for the string so far.
133         * (It is a prefix of a longer string.)
134         */
135        NO_VALUE,
136        /**
137         * The input unit(s) continued a matching string
138         * and there is a value for the string so far.
139         * This value will be returned by getValue().
140         * No further input byte/unit can continue a matching string.
141         */
142        FINAL_VALUE,
143        /**
144         * The input unit(s) continued a matching string
145         * and there is a value for the string so far.
146         * This value will be returned by getValue().
147         * Another input byte/unit can continue a matching string.
148         */
149        INTERMEDIATE_VALUE;
150
151        // Note: The following methods assume the particular order
152        // of enum constants, treating the ordinal() values like bit sets.
153        // Do not reorder the enum constants!
154
155        /**
156         * Same as (result!=NO_MATCH).
157         * @return true if the input bytes/units so far are part of a matching string/byte sequence.
158         */
159        public boolean matches() { return this!=NO_MATCH; }
160
161        /**
162         * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
163         * @return true if there is a value for the input bytes/units so far.
164         * @see #getValue
165         */
166        public boolean hasValue() { return ordinal()>=2; }
167
168        /**
169         * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
170         * @return true if another input byte/unit can continue a matching string.
171         */
172        public boolean hasNext() { return (ordinal()&1)!=0; }
173    }
174
175    /**
176     * Determines whether the byte sequence so far matches, whether it has a value,
177     * and whether another input byte can continue a matching byte sequence.
178     * @return The match/value Result.
179     */
180    public Result current() /*const*/ {
181        int pos=pos_;
182        if(pos<0) {
183            return Result.NO_MATCH;
184        } else {
185            int node;
186            return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
187                    valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
188        }
189    }
190
191    /**
192     * Traverses the trie from the initial state for this input byte.
193     * Equivalent to reset().next(inByte).
194     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
195     *               Values below -0x100 and above 0xff will never match.
196     * @return The match/value Result.
197     */
198    public Result first(int inByte) {
199        remainingMatchLength_=-1;
200        if(inByte<0) {
201            inByte+=0x100;
202        }
203        return nextImpl(root_, inByte);
204    }
205
206    /**
207     * Traverses the trie from the current state for this input byte.
208     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
209     *               Values below -0x100 and above 0xff will never match.
210     * @return The match/value Result.
211     */
212    public Result next(int inByte) {
213        int pos=pos_;
214        if(pos<0) {
215            return Result.NO_MATCH;
216        }
217        if(inByte<0) {
218            inByte+=0x100;
219        }
220        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
221        if(length>=0) {
222            // Remaining part of a linear-match node.
223            if(inByte==(bytes_[pos++]&0xff)) {
224                remainingMatchLength_=--length;
225                pos_=pos;
226                int node;
227                return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
228                        valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
229            } else {
230                stop();
231                return Result.NO_MATCH;
232            }
233        }
234        return nextImpl(pos, inByte);
235    }
236
237    /**
238     * Traverses the trie from the current state for this byte sequence.
239     * Equivalent to
240     * <pre>
241     * Result result=current();
242     * for(each c in s)
243     *   if(!result.hasNext()) return Result.NO_MATCH;
244     *   result=next(c);
245     * return result;
246     * </pre>
247     * @param s Contains a string or byte sequence.
248     * @param sIndex The start index of the byte sequence in s.
249     * @param sLimit The (exclusive) end index of the byte sequence in s.
250     * @return The match/value Result.
251     */
252    public Result next(byte[] s, int sIndex, int sLimit) {
253        if(sIndex>=sLimit) {
254            // Empty input.
255            return current();
256        }
257        int pos=pos_;
258        if(pos<0) {
259            return Result.NO_MATCH;
260        }
261        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
262        for(;;) {
263            // Fetch the next input byte, if there is one.
264            // Continue a linear-match node.
265            byte inByte;
266            for(;;) {
267                if(sIndex==sLimit) {
268                    remainingMatchLength_=length;
269                    pos_=pos;
270                    int node;
271                    return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
272                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
273                }
274                inByte=s[sIndex++];
275                if(length<0) {
276                    remainingMatchLength_=length;
277                    break;
278                }
279                if(inByte!=bytes_[pos]) {
280                    stop();
281                    return Result.NO_MATCH;
282                }
283                ++pos;
284                --length;
285            }
286            for(;;) {
287                int node=bytes_[pos++]&0xff;
288                if(node<kMinLinearMatch) {
289                    Result result=branchNext(pos, node, inByte&0xff);
290                    if(result==Result.NO_MATCH) {
291                        return Result.NO_MATCH;
292                    }
293                    // Fetch the next input byte, if there is one.
294                    if(sIndex==sLimit) {
295                        return result;
296                    }
297                    if(result==Result.FINAL_VALUE) {
298                        // No further matching bytes.
299                        stop();
300                        return Result.NO_MATCH;
301                    }
302                    inByte=s[sIndex++];
303                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
304                } else if(node<kMinValueLead) {
305                    // Match length+1 bytes.
306                    length=node-kMinLinearMatch;  // Actual match length minus 1.
307                    if(inByte!=bytes_[pos]) {
308                        stop();
309                        return Result.NO_MATCH;
310                    }
311                    ++pos;
312                    --length;
313                    break;
314                } else if((node&kValueIsFinal)!=0) {
315                    // No further matching bytes.
316                    stop();
317                    return Result.NO_MATCH;
318                } else {
319                    // Skip intermediate value.
320                    pos=skipValue(pos, node);
321                    // The next node must not also be a value node.
322                    assert((bytes_[pos]&0xff)<kMinValueLead);
323                }
324            }
325        }
326    }
327
328    /**
329     * Returns a matching byte sequence's value if called immediately after
330     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
331     * getValue() can be called multiple times.
332     *
333     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
334     * @return The value for the byte sequence so far.
335     */
336    public int getValue() /*const*/ {
337        int pos=pos_;
338        int leadByte=bytes_[pos++]&0xff;
339        assert(leadByte>=kMinValueLead);
340        return readValue(bytes_, pos, leadByte>>1);
341    }
342
343    /**
344     * Determines whether all byte sequences reachable from the current state
345     * map to the same value, and if so, returns that value.
346     * @return The unique value in bits 32..1 with bit 0 set,
347     *         if all byte sequences reachable from the current state
348     *         map to the same value; otherwise returns 0.
349     */
350    public long getUniqueValue() /*const*/ {
351        int pos=pos_;
352        if(pos<0) {
353            return 0;
354        }
355        // Skip the rest of a pending linear-match node.
356        long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
357        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
358        return (uniqueValue<<31)>>31;
359    }
360
361    /**
362     * Finds each byte which continues the byte sequence from the current state.
363     * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
364     * @param out Each next byte is 0-extended to a char and appended to this object.
365     *            (Only uses the out.append(c) method.)
366     * @return The number of bytes which continue the byte sequence from here.
367     */
368    public int getNextBytes(Appendable out) /*const*/ {
369        int pos=pos_;
370        if(pos<0) {
371            return 0;
372        }
373        if(remainingMatchLength_>=0) {
374            append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
375            return 1;
376        }
377        int node=bytes_[pos++]&0xff;
378        if(node>=kMinValueLead) {
379            if((node&kValueIsFinal)!=0) {
380                return 0;
381            } else {
382                pos=skipValue(pos, node);
383                node=bytes_[pos++]&0xff;
384                assert(node<kMinValueLead);
385            }
386        }
387        if(node<kMinLinearMatch) {
388            if(node==0) {
389                node=bytes_[pos++]&0xff;
390            }
391            getNextBranchBytes(bytes_, pos, ++node, out);
392            return node;
393        } else {
394            // First byte of the linear-match node.
395            append(out, bytes_[pos]&0xff);
396            return 1;
397        }
398    }
399
400    /**
401     * Iterates from the current state of this trie.
402     * @return A new BytesTrie.Iterator.
403     */
404    @Override
405    public Iterator iterator() {
406        return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
407    }
408
409    /**
410     * Iterates from the current state of this trie.
411     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
412     *                        Otherwise, the iterator returns strings with this maximum length.
413     * @return A new BytesTrie.Iterator.
414     */
415    public Iterator iterator(int maxStringLength) {
416        return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
417    }
418
419    /**
420     * Iterates from the root of a byte-serialized BytesTrie.
421     * @param trieBytes Bytes array that contains the serialized trie.
422     * @param offset Root offset of the trie in the array.
423     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
424     *                        Otherwise, the iterator returns strings with this maximum length.
425     * @return A new BytesTrie.Iterator.
426     */
427    public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
428        return new Iterator(trieBytes, offset, -1, maxStringLength);
429    }
430
431    /**
432     * Return value type for the Iterator.
433     */
434    public static final class Entry {
435        private Entry(int capacity) {
436            bytes=new byte[capacity];
437        }
438
439        /**
440         * @return The length of the byte sequence.
441         */
442        public int bytesLength() { return length; }
443        /**
444         * Returns a byte of the byte sequence.
445         * @param index An index into the byte sequence.
446         * @return The index-th byte sequence byte.
447         */
448        public byte byteAt(int index) { return bytes[index]; }
449        /**
450         * Copies the byte sequence into a byte array.
451         * @param dest Destination byte array.
452         * @param destOffset Starting offset to where in dest the byte sequence is copied.
453         */
454        public void copyBytesTo(byte[] dest, int destOffset) {
455            System.arraycopy(bytes, 0, dest, destOffset, length);
456        }
457        /**
458         * @return The byte sequence as a read-only ByteBuffer.
459         */
460        public ByteBuffer bytesAsByteBuffer() {
461            return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
462        }
463
464        /**
465         * The value associated with the byte sequence.
466         */
467        public int value;
468
469        private void ensureCapacity(int len) {
470            if(bytes.length<len) {
471                byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
472                System.arraycopy(bytes, 0, newBytes, 0, length);
473                bytes=newBytes;
474            }
475        }
476        private void append(byte b) {
477            ensureCapacity(length+1);
478            bytes[length++]=b;
479        }
480        private void append(byte[] b, int off, int len) {
481            ensureCapacity(length+len);
482            System.arraycopy(b, off, bytes, length, len);
483            length+=len;
484        }
485        private void truncateString(int newLength) { length=newLength; }
486
487        private byte[] bytes;
488        private int length;
489    }
490
491    /**
492     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
493     */
494    public static final class Iterator implements java.util.Iterator<Entry> {
495        private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
496            bytes_=trieBytes;
497            pos_=initialPos_=offset;
498            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
499            maxLength_=maxStringLength;
500            entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
501            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
502            if(length>=0) {
503                // Pending linear-match node, append remaining bytes to entry_.
504                ++length;
505                if(maxLength_>0 && length>maxLength_) {
506                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
507                }
508                entry_.append(bytes_, pos_, length);
509                pos_+=length;
510                remainingMatchLength_-=length;
511            }
512        }
513
514        /**
515         * Resets this iterator to its initial state.
516         * @return this
517         */
518        public Iterator reset() {
519            pos_=initialPos_;
520            remainingMatchLength_=initialRemainingMatchLength_;
521            int length=remainingMatchLength_+1;  // Remaining match length.
522            if(maxLength_>0 && length>maxLength_) {
523                length=maxLength_;
524            }
525            entry_.truncateString(length);
526            pos_+=length;
527            remainingMatchLength_-=length;
528            stack_.clear();
529            return this;
530        }
531
532        /**
533         * @return true if there are more elements.
534         */
535        @Override
536        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
537
538        /**
539         * Finds the next (byte sequence, value) pair if there is one.
540         *
541         * If the byte sequence is truncated to the maximum length and does not
542         * have a real value, then the value is set to -1.
543         * In this case, this "not a real value" is indistinguishable from
544         * a real value of -1.
545         * @return An Entry with the string and value of the next element.
546         * @throws NoSuchElementException - iteration has no more elements.
547         */
548        @Override
549        public Entry next() {
550            int pos=pos_;
551            if(pos<0) {
552                if(stack_.isEmpty()) {
553                    throw new NoSuchElementException();
554                }
555                // Pop the state off the stack and continue with the next outbound edge of
556                // the branch node.
557                long top=stack_.remove(stack_.size()-1);
558                int length=(int)top;
559                pos=(int)(top>>32);
560                entry_.truncateString(length&0xffff);
561                length>>>=16;
562                if(length>1) {
563                    pos=branchNext(pos, length);
564                    if(pos<0) {
565                        return entry_;  // Reached a final value.
566                    }
567                } else {
568                    entry_.append(bytes_[pos++]);
569                }
570            }
571            if(remainingMatchLength_>=0) {
572                // We only get here if we started in a pending linear-match node
573                // with more than maxLength remaining bytes.
574                return truncateAndStop();
575            }
576            for(;;) {
577                int node=bytes_[pos++]&0xff;
578                if(node>=kMinValueLead) {
579                    // Deliver value for the byte sequence so far.
580                    boolean isFinal=(node&kValueIsFinal)!=0;
581                    entry_.value=readValue(bytes_, pos, node>>1);
582                    if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
583                        pos_=-1;
584                    } else {
585                        pos_=skipValue(pos, node);
586                    }
587                    return entry_;
588                }
589                if(maxLength_>0 && entry_.length==maxLength_) {
590                    return truncateAndStop();
591                }
592                if(node<kMinLinearMatch) {
593                    if(node==0) {
594                        node=bytes_[pos++]&0xff;
595                    }
596                    pos=branchNext(pos, node+1);
597                    if(pos<0) {
598                        return entry_;  // Reached a final value.
599                    }
600                } else {
601                    // Linear-match node, append length bytes to entry_.
602                    int length=node-kMinLinearMatch+1;
603                    if(maxLength_>0 && entry_.length+length>maxLength_) {
604                        entry_.append(bytes_, pos, maxLength_-entry_.length);
605                        return truncateAndStop();
606                    }
607                    entry_.append(bytes_, pos, length);
608                    pos+=length;
609                }
610            }
611        }
612
613        /**
614         * Iterator.remove() is not supported.
615         * @throws UnsupportedOperationException (always)
616         */
617        @Override
618        public void remove() {
619            throw new UnsupportedOperationException();
620        }
621
622        private Entry truncateAndStop() {
623            pos_=-1;
624            entry_.value=-1;  // no real value for str
625            return entry_;
626        }
627
628        private int branchNext(int pos, int length) {
629            while(length>kMaxBranchLinearSubNodeLength) {
630                ++pos;  // ignore the comparison byte
631                // Push state for the greater-or-equal edge.
632                stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
633                // Follow the less-than edge.
634                length>>=1;
635                pos=jumpByDelta(bytes_, pos);
636            }
637            // List of key-value pairs where values are either final values or jump deltas.
638            // Read the first (key, value) pair.
639            byte trieByte=bytes_[pos++];
640            int node=bytes_[pos++]&0xff;
641            boolean isFinal=(node&kValueIsFinal)!=0;
642            int value=readValue(bytes_, pos, node>>1);
643            pos=skipValue(pos, node);
644            stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
645            entry_.append(trieByte);
646            if(isFinal) {
647                pos_=-1;
648                entry_.value=value;
649                return -1;
650            } else {
651                return pos+value;
652            }
653        }
654
655        private byte[] bytes_;
656        private int pos_;
657        private int initialPos_;
658        private int remainingMatchLength_;
659        private int initialRemainingMatchLength_;
660
661        private int maxLength_;
662        private Entry entry_;
663
664        // The stack stores longs for backtracking to another
665        // outbound edge of a branch node.
666        // Each long has the offset from bytes_ in bits 62..32,
667        // the entry_.stringLength() from before the node in bits 15..0,
668        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
669        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
670        // but the code looks more confusing that way.)
671        private ArrayList<Long> stack_=new ArrayList<Long>();
672    }
673
674    private void stop() {
675        pos_=-1;
676    }
677
678    // Reads a compact 32-bit integer.
679    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
680    private static int readValue(byte[] bytes, int pos, int leadByte) {
681        int value;
682        if(leadByte<kMinTwoByteValueLead) {
683            value=leadByte-kMinOneByteValueLead;
684        } else if(leadByte<kMinThreeByteValueLead) {
685            value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
686        } else if(leadByte<kFourByteValueLead) {
687            value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
688        } else if(leadByte==kFourByteValueLead) {
689            value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
690        } else {
691            value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
692        }
693        return value;
694    }
695    private static int skipValue(int pos, int leadByte) {
696        assert(leadByte>=kMinValueLead);
697        if(leadByte>=(kMinTwoByteValueLead<<1)) {
698            if(leadByte<(kMinThreeByteValueLead<<1)) {
699                ++pos;
700            } else if(leadByte<(kFourByteValueLead<<1)) {
701                pos+=2;
702            } else {
703                pos+=3+((leadByte>>1)&1);
704            }
705        }
706        return pos;
707    }
708    private static int skipValue(byte[] bytes, int pos) {
709        int leadByte=bytes[pos++]&0xff;
710        return skipValue(pos, leadByte);
711    }
712
713    // Reads a jump delta and jumps.
714    private static int jumpByDelta(byte[] bytes, int pos) {
715        int delta=bytes[pos++]&0xff;
716        if(delta<kMinTwoByteDeltaLead) {
717            // nothing to do
718        } else if(delta<kMinThreeByteDeltaLead) {
719            delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
720        } else if(delta<kFourByteDeltaLead) {
721            delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
722            pos+=2;
723        } else if(delta==kFourByteDeltaLead) {
724            delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
725            pos+=3;
726        } else {
727            delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
728            pos+=4;
729        }
730        return pos+delta;
731    }
732
733    private static int skipDelta(byte[] bytes, int pos) {
734        int delta=bytes[pos++]&0xff;
735        if(delta>=kMinTwoByteDeltaLead) {
736            if(delta<kMinThreeByteDeltaLead) {
737                ++pos;
738            } else if(delta<kFourByteDeltaLead) {
739                pos+=2;
740            } else {
741                pos+=3+(delta&1);
742            }
743        }
744        return pos;
745    }
746
747    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
748
749    // Handles a branch node for both next(byte) and next(string).
750    private Result branchNext(int pos, int length, int inByte) {
751        // Branch according to the current byte.
752        if(length==0) {
753            length=bytes_[pos++]&0xff;
754        }
755        ++length;
756        // The length of the branch is the number of bytes to select from.
757        // The data structure encodes a binary search.
758        while(length>kMaxBranchLinearSubNodeLength) {
759            if(inByte<(bytes_[pos++]&0xff)) {
760                length>>=1;
761                pos=jumpByDelta(bytes_, pos);
762            } else {
763                length=length-(length>>1);
764                pos=skipDelta(bytes_, pos);
765            }
766        }
767        // Drop down to linear search for the last few bytes.
768        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
769        // and divides length by 2.
770        do {
771            if(inByte==(bytes_[pos++]&0xff)) {
772                Result result;
773                int node=bytes_[pos]&0xff;
774                assert(node>=kMinValueLead);
775                if((node&kValueIsFinal)!=0) {
776                    // Leave the final value for getValue() to read.
777                    result=Result.FINAL_VALUE;
778                } else {
779                    // Use the non-final value as the jump delta.
780                    ++pos;
781                    // int delta=readValue(pos, node>>1);
782                    node>>=1;
783                    int delta;
784                    if(node<kMinTwoByteValueLead) {
785                        delta=node-kMinOneByteValueLead;
786                    } else if(node<kMinThreeByteValueLead) {
787                        delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
788                    } else if(node<kFourByteValueLead) {
789                        delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
790                        pos+=2;
791                    } else if(node==kFourByteValueLead) {
792                        delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
793                        pos+=3;
794                    } else {
795                        delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
796                        pos+=4;
797                    }
798                    // end readValue()
799                    pos+=delta;
800                    node=bytes_[pos]&0xff;
801                    result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
802                }
803                pos_=pos;
804                return result;
805            }
806            --length;
807            pos=skipValue(bytes_, pos);
808        } while(length>1);
809        if(inByte==(bytes_[pos++]&0xff)) {
810            pos_=pos;
811            int node=bytes_[pos]&0xff;
812            return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
813        } else {
814            stop();
815            return Result.NO_MATCH;
816        }
817    }
818
819    // Requires remainingLength_<0.
820    private Result nextImpl(int pos, int inByte) {
821        for(;;) {
822            int node=bytes_[pos++]&0xff;
823            if(node<kMinLinearMatch) {
824                return branchNext(pos, node, inByte);
825            } else if(node<kMinValueLead) {
826                // Match the first of length+1 bytes.
827                int length=node-kMinLinearMatch;  // Actual match length minus 1.
828                if(inByte==(bytes_[pos++]&0xff)) {
829                    remainingMatchLength_=--length;
830                    pos_=pos;
831                    return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
832                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
833                } else {
834                    // No match.
835                    break;
836                }
837            } else if((node&kValueIsFinal)!=0) {
838                // No further matching bytes.
839                break;
840            } else {
841                // Skip intermediate value.
842                pos=skipValue(pos, node);
843                // The next node must not also be a value node.
844                assert((bytes_[pos]&0xff)<kMinValueLead);
845            }
846        }
847        stop();
848        return Result.NO_MATCH;
849    }
850
851    // Helper functions for getUniqueValue().
852    // Recursively finds a unique value (or whether there is not a unique one)
853    // from a branch.
854    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
855    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
856    private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
857                                                  long uniqueValue) {
858        while(length>kMaxBranchLinearSubNodeLength) {
859            ++pos;  // ignore the comparison byte
860            uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
861            if(uniqueValue==0) {
862                return 0;
863            }
864            length=length-(length>>1);
865            pos=skipDelta(bytes, pos);
866        }
867        do {
868            ++pos;  // ignore a comparison byte
869            // handle its value
870            int node=bytes[pos++]&0xff;
871            boolean isFinal=(node&kValueIsFinal)!=0;
872            int value=readValue(bytes, pos, node>>1);
873            pos=skipValue(pos, node);
874            if(isFinal) {
875                if(uniqueValue!=0) {
876                    if(value!=(int)(uniqueValue>>1)) {
877                        return 0;
878                    }
879                } else {
880                    uniqueValue=((long)value<<1)|1;
881                }
882            } else {
883                uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
884                if(uniqueValue==0) {
885                    return 0;
886                }
887            }
888        } while(--length>1);
889        // ignore the last comparison byte
890        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
891    }
892    // Recursively finds a unique value (or whether there is not a unique one)
893    // starting from a position on a node lead byte.
894    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
895    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
896    private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
897        for(;;) {
898            int node=bytes[pos++]&0xff;
899            if(node<kMinLinearMatch) {
900                if(node==0) {
901                    node=bytes[pos++]&0xff;
902                }
903                uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
904                if(uniqueValue==0) {
905                    return 0;
906                }
907                pos=(int)(uniqueValue>>>33);
908            } else if(node<kMinValueLead) {
909                // linear-match node
910                pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
911            } else {
912                boolean isFinal=(node&kValueIsFinal)!=0;
913                int value=readValue(bytes, pos, node>>1);
914                if(uniqueValue!=0) {
915                    if(value!=(int)(uniqueValue>>1)) {
916                        return 0;
917                    }
918                } else {
919                    uniqueValue=((long)value<<1)|1;
920                }
921                if(isFinal) {
922                    return uniqueValue;
923                }
924                pos=skipValue(pos, node);
925            }
926        }
927    }
928
929    // Helper functions for getNextBytes().
930    // getNextBytes() when pos is on a branch node.
931    private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
932        while(length>kMaxBranchLinearSubNodeLength) {
933            ++pos;  // ignore the comparison byte
934            getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
935            length=length-(length>>1);
936            pos=skipDelta(bytes, pos);
937        }
938        do {
939            append(out, bytes[pos++]&0xff);
940            pos=skipValue(bytes, pos);
941        } while(--length>1);
942        append(out, bytes[pos]&0xff);
943    }
944    private static void append(Appendable out, int c) {
945        try {
946            out.append((char)c);
947        } catch(IOException e) {
948            throw new ICUUncheckedIOException(e);
949        }
950    }
951
952    // BytesTrie data structure
953    //
954    // The trie consists of a series of byte-serialized nodes for incremental
955    // string/byte sequence matching. The root node is at the beginning of the trie data.
956    //
957    // Types of nodes are distinguished by their node lead byte ranges.
958    // After each node, except a final-value node, another node follows to
959    // encode match values or continue matching further bytes.
960    //
961    // Node types:
962    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
963    //    The value is for the string/byte sequence so far.
964    //    One node bit indicates whether the value is final or whether
965    //    matching continues with the next node.
966    //  - Linear-match node: Matches a number of bytes.
967    //  - Branch node: Branches to other nodes according to the current input byte.
968    //    The node byte is the length of the branch (number of bytes to select from)
969    //    minus 1. It is followed by a sub-node:
970    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
971    //      there are length-1 (key, value) pairs and then one more comparison byte.
972    //      If one of the key bytes matches, then the value is either a final value for
973    //      the string/byte sequence so far, or a "jump" delta to the next node.
974    //      If the last byte matches, then matching continues with the next node.
975    //      (Values have the same encoding as value nodes.)
976    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
977    //      there is one byte and one "jump" delta.
978    //      If the input byte is less than the sub-node byte, then "jump" by delta to
979    //      the next sub-node which will have a length of length/2.
980    //      (The delta has its own compact encoding.)
981    //      Otherwise, skip the "jump" delta to the next sub-node
982    //      which will have a length of length-length/2.
983
984    // Node lead byte values.
985
986    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
987    // the length is one more than the next byte.
988
989    // For a branch sub-node with at most this many entries, we drop down
990    // to a linear search.
991    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
992
993    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
994    /*package*/ static final int kMinLinearMatch=0x10;
995    /*package*/ static final int kMaxLinearMatchLength=0x10;
996
997    // 20..ff: Variable-length value node.
998    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
999    // Then shift-right by 1 bit.
1000    // The remaining lead byte value indicates the number of following bytes (0..4)
1001    // and contains the value's top bits.
1002    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1003    // It is a final value if bit 0 is set.
1004    private static final int kValueIsFinal=1;
1005
1006    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1007    /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1008    /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1009
1010    /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1011    /*package*/ static final int kMaxTwoByteValue=0x1aff;
1012
1013    /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1014    /*package*/ static final int kFourByteValueLead=0x7e;
1015
1016    // A little more than Unicode code points. (0x11ffff)
1017    /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1018
1019    /*package*/ static final int kFiveByteValueLead=0x7f;
1020
1021    // Compact delta integers.
1022    /*package*/ static final int kMaxOneByteDelta=0xbf;
1023    /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1024    /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1025    /*package*/ static final int kFourByteDeltaLead=0xfe;
1026    /*package*/ static final int kFiveByteDeltaLead=0xff;
1027
1028    /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1029    /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1030
1031    // Fixed value referencing the BytesTrie bytes.
1032    private byte[] bytes_;
1033    private int root_;
1034
1035    // Iterator variables.
1036
1037    // Index of next trie byte to read. Negative if no more matches.
1038    private int pos_;
1039    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1040    private int remainingMatchLength_;
1041};
1042