BytesTrie.java revision 836e6b40a94ec3fb7545a76cb072960442b7eee9
1/* GENERATED SOURCE. DO NOT MODIFY. */
2/*
3*******************************************************************************
4*   Copyright (C) 2010-2014, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*******************************************************************************
7*   created on: 2010nov23
8*   created by: Markus W. Scherer
9*   ported from ICU4C bytestrie.h/.cpp
10*/
11package android.icu.util;
12
13import java.io.IOException;
14import java.nio.ByteBuffer;
15import java.util.ArrayList;
16import java.util.NoSuchElementException;
17
18/**
19 * Light-weight, non-const reader class for a BytesTrie.
20 * Traverses a byte-serialized data structure with minimal state,
21 * for mapping byte sequences to non-negative integer values.
22 *
23 * <p>This class is not intended for public subclassing.
24 *
25 * @stable ICU 4.8
26 * @author Markus W. Scherer
27 * @hide Only a subset of ICU is exposed in Android
28 * @hide All android.icu classes are currently hidden
29 */
30public final class BytesTrie implements Cloneable, Iterable<BytesTrie.Entry> {
31    /**
32     * Constructs a BytesTrie reader instance.
33     *
34     * <p>The array must contain a copy of a byte sequence from the BytesTrieBuilder,
35     * with the offset indicating the first byte of that sequence.
36     * The BytesTrie object will not read more bytes than
37     * the BytesTrieBuilder generated in the corresponding build() call.
38     *
39     * <p>The array is not copied/cloned and must not be modified while
40     * the BytesTrie object is in use.
41     *
42     * @param trieBytes Bytes array that contains the serialized trie.
43     * @param offset Root offset of the trie in the array.
44     * @stable ICU 4.8
45     */
46    public BytesTrie(byte[] trieBytes, int offset) {
47        bytes_=trieBytes;
48        pos_=root_=offset;
49        remainingMatchLength_=-1;
50    }
51
52    /**
53     * Clones this trie reader object and its state,
54     * but not the byte array which will be shared.
55     * @return A shallow clone of this trie.
56     * @stable ICU 4.8
57     */
58    @Override
59    public Object clone() throws CloneNotSupportedException {
60        return super.clone();  // A shallow copy is just what we need.
61    }
62
63    /**
64     * Resets this trie to its initial state.
65     * @return this
66     * @stable ICU 4.8
67     */
68    public BytesTrie reset() {
69        pos_=root_;
70        remainingMatchLength_=-1;
71        return this;
72    }
73
74    /**
75     * BytesTrie state object, for saving a trie's current state
76     * and resetting the trie back to this state later.
77     * @stable ICU 4.8
78     */
79    public static final class State {
80        /**
81         * Constructs an empty State.
82         * @stable ICU 4.8
83         */
84        public State() {}
85        private byte[] bytes;
86        private int root;
87        private int pos;
88        private int remainingMatchLength;
89    }
90
91    /**
92     * Saves the state of this trie.
93     * @param state The State object to hold the trie's state.
94     * @return this
95     * @see #resetToState
96     * @stable ICU 4.8
97     */
98    public BytesTrie saveState(State state) /*const*/ {
99        state.bytes=bytes_;
100        state.root=root_;
101        state.pos=pos_;
102        state.remainingMatchLength=remainingMatchLength_;
103        return this;
104    }
105
106    /**
107     * Resets this trie to the saved state.
108     * @param state The State object which holds a saved trie state.
109     * @return this
110     * @throws IllegalArgumentException if the state object contains no state,
111     *         or the state of a different trie
112     * @see #saveState
113     * @see #reset
114     * @stable ICU 4.8
115     */
116    public BytesTrie resetToState(State state) {
117        if(bytes_==state.bytes && bytes_!=null && root_==state.root) {
118            pos_=state.pos;
119            remainingMatchLength_=state.remainingMatchLength;
120        } else {
121            throw new IllegalArgumentException("incompatible trie state");
122        }
123        return this;
124    }
125
126    /**
127     * Return values for BytesTrie.next(), CharsTrie.next() and similar methods.
128     * @stable ICU 4.8
129     */
130    public enum Result {
131        /**
132         * The input unit(s) did not continue a matching string.
133         * Once current()/next() return NO_MATCH,
134         * all further calls to current()/next() will also return NO_MATCH,
135         * until the trie is reset to its original state or to a saved state.
136         * @stable ICU 4.8
137         */
138        NO_MATCH,
139        /**
140         * The input unit(s) continued a matching string
141         * but there is no value for the string so far.
142         * (It is a prefix of a longer string.)
143         * @stable ICU 4.8
144         */
145        NO_VALUE,
146        /**
147         * The input unit(s) continued a matching string
148         * and there is a value for the string so far.
149         * This value will be returned by getValue().
150         * No further input byte/unit can continue a matching string.
151         * @stable ICU 4.8
152         */
153        FINAL_VALUE,
154        /**
155         * The input unit(s) continued a matching string
156         * and there is a value for the string so far.
157         * This value will be returned by getValue().
158         * Another input byte/unit can continue a matching string.
159         * @stable ICU 4.8
160         */
161        INTERMEDIATE_VALUE;
162
163        // Note: The following methods assume the particular order
164        // of enum constants, treating the ordinal() values like bit sets.
165        // Do not reorder the enum constants!
166
167        /**
168         * Same as (result!=NO_MATCH).
169         * @return true if the input bytes/units so far are part of a matching string/byte sequence.
170         * @stable ICU 4.8
171         */
172        public boolean matches() { return this!=NO_MATCH; }
173
174        /**
175         * Equivalent to (result==INTERMEDIATE_VALUE || result==FINAL_VALUE).
176         * @return true if there is a value for the input bytes/units so far.
177         * @see #getValue
178         * @stable ICU 4.8
179         */
180        public boolean hasValue() { return ordinal()>=2; }
181
182        /**
183         * Equivalent to (result==NO_VALUE || result==INTERMEDIATE_VALUE).
184         * @return true if another input byte/unit can continue a matching string.
185         * @stable ICU 4.8
186         */
187        public boolean hasNext() { return (ordinal()&1)!=0; }
188    }
189
190    /**
191     * Determines whether the byte sequence so far matches, whether it has a value,
192     * and whether another input byte can continue a matching byte sequence.
193     * @return The match/value Result.
194     * @stable ICU 4.8
195     */
196    public Result current() /*const*/ {
197        int pos=pos_;
198        if(pos<0) {
199            return Result.NO_MATCH;
200        } else {
201            int node;
202            return (remainingMatchLength_<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
203                    valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
204        }
205    }
206
207    /**
208     * Traverses the trie from the initial state for this input byte.
209     * Equivalent to reset().next(inByte).
210     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
211     *               Values below -0x100 and above 0xff will never match.
212     * @return The match/value Result.
213     * @stable ICU 4.8
214     */
215    public Result first(int inByte) {
216        remainingMatchLength_=-1;
217        if(inByte<0) {
218            inByte+=0x100;
219        }
220        return nextImpl(root_, inByte);
221    }
222
223    /**
224     * Traverses the trie from the current state for this input byte.
225     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
226     *               Values below -0x100 and above 0xff will never match.
227     * @return The match/value Result.
228     * @stable ICU 4.8
229     */
230    public Result next(int inByte) {
231        int pos=pos_;
232        if(pos<0) {
233            return Result.NO_MATCH;
234        }
235        if(inByte<0) {
236            inByte+=0x100;
237        }
238        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
239        if(length>=0) {
240            // Remaining part of a linear-match node.
241            if(inByte==(bytes_[pos++]&0xff)) {
242                remainingMatchLength_=--length;
243                pos_=pos;
244                int node;
245                return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
246                        valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
247            } else {
248                stop();
249                return Result.NO_MATCH;
250            }
251        }
252        return nextImpl(pos, inByte);
253    }
254
255    /**
256     * Traverses the trie from the current state for this byte sequence.
257     * Equivalent to
258     * <pre>
259     * Result result=current();
260     * for(each c in s)
261     *   if(!result.hasNext()) return Result.NO_MATCH;
262     *   result=next(c);
263     * return result;
264     * </pre>
265     * @param s Contains a string or byte sequence.
266     * @param sIndex The start index of the byte sequence in s.
267     * @param sLimit The (exclusive) end index of the byte sequence in s.
268     * @return The match/value Result.
269     * @stable ICU 4.8
270     */
271    public Result next(byte[] s, int sIndex, int sLimit) {
272        if(sIndex>=sLimit) {
273            // Empty input.
274            return current();
275        }
276        int pos=pos_;
277        if(pos<0) {
278            return Result.NO_MATCH;
279        }
280        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
281        for(;;) {
282            // Fetch the next input byte, if there is one.
283            // Continue a linear-match node.
284            byte inByte;
285            for(;;) {
286                if(sIndex==sLimit) {
287                    remainingMatchLength_=length;
288                    pos_=pos;
289                    int node;
290                    return (length<0 && (node=(bytes_[pos]&0xff))>=kMinValueLead) ?
291                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
292                }
293                inByte=s[sIndex++];
294                if(length<0) {
295                    remainingMatchLength_=length;
296                    break;
297                }
298                if(inByte!=bytes_[pos]) {
299                    stop();
300                    return Result.NO_MATCH;
301                }
302                ++pos;
303                --length;
304            }
305            for(;;) {
306                int node=bytes_[pos++]&0xff;
307                if(node<kMinLinearMatch) {
308                    Result result=branchNext(pos, node, inByte&0xff);
309                    if(result==Result.NO_MATCH) {
310                        return Result.NO_MATCH;
311                    }
312                    // Fetch the next input byte, if there is one.
313                    if(sIndex==sLimit) {
314                        return result;
315                    }
316                    if(result==Result.FINAL_VALUE) {
317                        // No further matching bytes.
318                        stop();
319                        return Result.NO_MATCH;
320                    }
321                    inByte=s[sIndex++];
322                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
323                } else if(node<kMinValueLead) {
324                    // Match length+1 bytes.
325                    length=node-kMinLinearMatch;  // Actual match length minus 1.
326                    if(inByte!=bytes_[pos]) {
327                        stop();
328                        return Result.NO_MATCH;
329                    }
330                    ++pos;
331                    --length;
332                    break;
333                } else if((node&kValueIsFinal)!=0) {
334                    // No further matching bytes.
335                    stop();
336                    return Result.NO_MATCH;
337                } else {
338                    // Skip intermediate value.
339                    pos=skipValue(pos, node);
340                    // The next node must not also be a value node.
341                    assert((bytes_[pos]&0xff)<kMinValueLead);
342                }
343            }
344        }
345    }
346
347    /**
348     * Returns a matching byte sequence's value if called immediately after
349     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
350     * getValue() can be called multiple times.
351     *
352     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
353     * @return The value for the byte sequence so far.
354     * @stable ICU 4.8
355     */
356    public int getValue() /*const*/ {
357        int pos=pos_;
358        int leadByte=bytes_[pos++]&0xff;
359        assert(leadByte>=kMinValueLead);
360        return readValue(bytes_, pos, leadByte>>1);
361    }
362
363    /**
364     * Determines whether all byte sequences reachable from the current state
365     * map to the same value, and if so, returns that value.
366     * @return The unique value in bits 32..1 with bit 0 set,
367     *         if all byte sequences reachable from the current state
368     *         map to the same value; otherwise returns 0.
369     * @stable ICU 4.8
370     */
371    public long getUniqueValue() /*const*/ {
372        int pos=pos_;
373        if(pos<0) {
374            return 0;
375        }
376        // Skip the rest of a pending linear-match node.
377        long uniqueValue=findUniqueValue(bytes_, pos+remainingMatchLength_+1, 0);
378        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
379        return (uniqueValue<<31)>>31;
380    }
381
382    /**
383     * Finds each byte which continues the byte sequence from the current state.
384     * That is, each byte b for which it would be next(b)!=Result.NO_MATCH now.
385     * @param out Each next byte is 0-extended to a char and appended to this object.
386     *            (Only uses the out.append(c) method.)
387     * @return The number of bytes which continue the byte sequence from here.
388     * @stable ICU 4.8
389     */
390    public int getNextBytes(Appendable out) /*const*/ {
391        int pos=pos_;
392        if(pos<0) {
393            return 0;
394        }
395        if(remainingMatchLength_>=0) {
396            append(out, bytes_[pos]&0xff);  // Next byte of a pending linear-match node.
397            return 1;
398        }
399        int node=bytes_[pos++]&0xff;
400        if(node>=kMinValueLead) {
401            if((node&kValueIsFinal)!=0) {
402                return 0;
403            } else {
404                pos=skipValue(pos, node);
405                node=bytes_[pos++]&0xff;
406                assert(node<kMinValueLead);
407            }
408        }
409        if(node<kMinLinearMatch) {
410            if(node==0) {
411                node=bytes_[pos++]&0xff;
412            }
413            getNextBranchBytes(bytes_, pos, ++node, out);
414            return node;
415        } else {
416            // First byte of the linear-match node.
417            append(out, bytes_[pos]&0xff);
418            return 1;
419        }
420    }
421
422    /**
423     * Iterates from the current state of this trie.
424     * @return A new BytesTrie.Iterator.
425     * @stable ICU 4.8
426     */
427    public Iterator iterator() {
428        return new Iterator(bytes_, pos_, remainingMatchLength_, 0);
429    }
430
431    /**
432     * Iterates from the current state of this trie.
433     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
434     *                        Otherwise, the iterator returns strings with this maximum length.
435     * @return A new BytesTrie.Iterator.
436     * @stable ICU 4.8
437     */
438    public Iterator iterator(int maxStringLength) {
439        return new Iterator(bytes_, pos_, remainingMatchLength_, maxStringLength);
440    }
441
442    /**
443     * Iterates from the root of a byte-serialized BytesTrie.
444     * @param trieBytes Bytes array that contains the serialized trie.
445     * @param offset Root offset of the trie in the array.
446     * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
447     *                        Otherwise, the iterator returns strings with this maximum length.
448     * @return A new BytesTrie.Iterator.
449     * @stable ICU 4.8
450     */
451    public static Iterator iterator(byte[] trieBytes, int offset, int maxStringLength) {
452        return new Iterator(trieBytes, offset, -1, maxStringLength);
453    }
454
455    /**
456     * Return value type for the Iterator.
457     * @stable ICU 4.8
458     */
459    public static final class Entry {
460        private Entry(int capacity) {
461            bytes=new byte[capacity];
462        }
463
464        /**
465         * @return The length of the byte sequence.
466         * @stable ICU 4.8
467         */
468        public int bytesLength() { return length; }
469        /**
470         * Returns a byte of the byte sequence.
471         * @param index An index into the byte sequence.
472         * @return The index-th byte sequence byte.
473         * @stable ICU 4.8
474         */
475        public byte byteAt(int index) { return bytes[index]; }
476        /**
477         * Copies the byte sequence into a byte array.
478         * @param dest Destination byte array.
479         * @param destOffset Starting offset to where in dest the byte sequence is copied.
480         * @stable ICU 4.8
481         */
482        public void copyBytesTo(byte[] dest, int destOffset) {
483            System.arraycopy(bytes, 0, dest, destOffset, length);
484        }
485        /**
486         * @return The byte sequence as a read-only ByteBuffer.
487         * @stable ICU 4.8
488         */
489        public ByteBuffer bytesAsByteBuffer() {
490            return ByteBuffer.wrap(bytes, 0, length).asReadOnlyBuffer();
491        }
492
493        /**
494         * The value associated with the byte sequence.
495         * @stable ICU 4.8
496         */
497        public int value;
498
499        private void ensureCapacity(int len) {
500            if(bytes.length<len) {
501                byte[] newBytes=new byte[Math.min(2*bytes.length, 2*len)];
502                System.arraycopy(bytes, 0, newBytes, 0, length);
503                bytes=newBytes;
504            }
505        }
506        private void append(byte b) {
507            ensureCapacity(length+1);
508            bytes[length++]=b;
509        }
510        private void append(byte[] b, int off, int len) {
511            ensureCapacity(length+len);
512            System.arraycopy(b, off, bytes, length, len);
513            length+=len;
514        }
515        private void truncateString(int newLength) { length=newLength; }
516
517        private byte[] bytes;
518        private int length;
519    }
520
521    /**
522     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
523     * @stable ICU 4.8
524     */
525    public static final class Iterator implements java.util.Iterator<Entry> {
526        private Iterator(byte[] trieBytes, int offset, int remainingMatchLength, int maxStringLength) {
527            bytes_=trieBytes;
528            pos_=initialPos_=offset;
529            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
530            maxLength_=maxStringLength;
531            entry_=new Entry(maxLength_!=0 ? maxLength_ : 32);
532            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
533            if(length>=0) {
534                // Pending linear-match node, append remaining bytes to entry_.
535                ++length;
536                if(maxLength_>0 && length>maxLength_) {
537                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
538                }
539                entry_.append(bytes_, pos_, length);
540                pos_+=length;
541                remainingMatchLength_-=length;
542            }
543        }
544
545        /**
546         * Resets this iterator to its initial state.
547         * @return this
548         * @stable ICU 4.8
549         */
550        public Iterator reset() {
551            pos_=initialPos_;
552            remainingMatchLength_=initialRemainingMatchLength_;
553            int length=remainingMatchLength_+1;  // Remaining match length.
554            if(maxLength_>0 && length>maxLength_) {
555                length=maxLength_;
556            }
557            entry_.truncateString(length);
558            pos_+=length;
559            remainingMatchLength_-=length;
560            stack_.clear();
561            return this;
562        }
563
564        /**
565         * @return true if there are more elements.
566         * @stable ICU 4.8
567         */
568        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
569
570        /**
571         * Finds the next (byte sequence, value) pair if there is one.
572         *
573         * If the byte sequence is truncated to the maximum length and does not
574         * have a real value, then the value is set to -1.
575         * In this case, this "not a real value" is indistinguishable from
576         * a real value of -1.
577         * @return An Entry with the string and value of the next element.
578         * @throws NoSuchElementException - iteration has no more elements.
579         * @stable ICU 4.8
580         */
581        public Entry next() {
582            int pos=pos_;
583            if(pos<0) {
584                if(stack_.isEmpty()) {
585                    throw new NoSuchElementException();
586                }
587                // Pop the state off the stack and continue with the next outbound edge of
588                // the branch node.
589                long top=stack_.remove(stack_.size()-1);
590                int length=(int)top;
591                pos=(int)(top>>32);
592                entry_.truncateString(length&0xffff);
593                length>>>=16;
594                if(length>1) {
595                    pos=branchNext(pos, length);
596                    if(pos<0) {
597                        return entry_;  // Reached a final value.
598                    }
599                } else {
600                    entry_.append(bytes_[pos++]);
601                }
602            }
603            if(remainingMatchLength_>=0) {
604                // We only get here if we started in a pending linear-match node
605                // with more than maxLength remaining bytes.
606                return truncateAndStop();
607            }
608            for(;;) {
609                int node=bytes_[pos++]&0xff;
610                if(node>=kMinValueLead) {
611                    // Deliver value for the byte sequence so far.
612                    boolean isFinal=(node&kValueIsFinal)!=0;
613                    entry_.value=readValue(bytes_, pos, node>>1);
614                    if(isFinal || (maxLength_>0 && entry_.length==maxLength_)) {
615                        pos_=-1;
616                    } else {
617                        pos_=skipValue(pos, node);
618                    }
619                    return entry_;
620                }
621                if(maxLength_>0 && entry_.length==maxLength_) {
622                    return truncateAndStop();
623                }
624                if(node<kMinLinearMatch) {
625                    if(node==0) {
626                        node=bytes_[pos++]&0xff;
627                    }
628                    pos=branchNext(pos, node+1);
629                    if(pos<0) {
630                        return entry_;  // Reached a final value.
631                    }
632                } else {
633                    // Linear-match node, append length bytes to entry_.
634                    int length=node-kMinLinearMatch+1;
635                    if(maxLength_>0 && entry_.length+length>maxLength_) {
636                        entry_.append(bytes_, pos, maxLength_-entry_.length);
637                        return truncateAndStop();
638                    }
639                    entry_.append(bytes_, pos, length);
640                    pos+=length;
641                }
642            }
643        }
644
645        /**
646         * Iterator.remove() is not supported.
647         * @throws UnsupportedOperationException (always)
648         * @stable ICU 4.8
649         */
650        public void remove() {
651            throw new UnsupportedOperationException();
652        }
653
654        private Entry truncateAndStop() {
655            pos_=-1;
656            entry_.value=-1;  // no real value for str
657            return entry_;
658        }
659
660        private int branchNext(int pos, int length) {
661            while(length>kMaxBranchLinearSubNodeLength) {
662                ++pos;  // ignore the comparison byte
663                // Push state for the greater-or-equal edge.
664                stack_.add(((long)skipDelta(bytes_, pos)<<32)|((length-(length>>1))<<16)|entry_.length);
665                // Follow the less-than edge.
666                length>>=1;
667                pos=jumpByDelta(bytes_, pos);
668            }
669            // List of key-value pairs where values are either final values or jump deltas.
670            // Read the first (key, value) pair.
671            byte trieByte=bytes_[pos++];
672            int node=bytes_[pos++]&0xff;
673            boolean isFinal=(node&kValueIsFinal)!=0;
674            int value=readValue(bytes_, pos, node>>1);
675            pos=skipValue(pos, node);
676            stack_.add(((long)pos<<32)|((length-1)<<16)|entry_.length);
677            entry_.append(trieByte);
678            if(isFinal) {
679                pos_=-1;
680                entry_.value=value;
681                return -1;
682            } else {
683                return pos+value;
684            }
685        }
686
687        private byte[] bytes_;
688        private int pos_;
689        private int initialPos_;
690        private int remainingMatchLength_;
691        private int initialRemainingMatchLength_;
692
693        private int maxLength_;
694        private Entry entry_;
695
696        // The stack stores longs for backtracking to another
697        // outbound edge of a branch node.
698        // Each long has the offset from bytes_ in bits 62..32,
699        // the entry_.stringLength() from before the node in bits 15..0,
700        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
701        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
702        // but the code looks more confusing that way.)
703        private ArrayList<Long> stack_=new ArrayList<Long>();
704    }
705
706    private void stop() {
707        pos_=-1;
708    }
709
710    // Reads a compact 32-bit integer.
711    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
712    private static int readValue(byte[] bytes, int pos, int leadByte) {
713        int value;
714        if(leadByte<kMinTwoByteValueLead) {
715            value=leadByte-kMinOneByteValueLead;
716        } else if(leadByte<kMinThreeByteValueLead) {
717            value=((leadByte-kMinTwoByteValueLead)<<8)|(bytes[pos]&0xff);
718        } else if(leadByte<kFourByteValueLead) {
719            value=((leadByte-kMinThreeByteValueLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
720        } else if(leadByte==kFourByteValueLead) {
721            value=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
722        } else {
723            value=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
724        }
725        return value;
726    }
727    private static int skipValue(int pos, int leadByte) {
728        assert(leadByte>=kMinValueLead);
729        if(leadByte>=(kMinTwoByteValueLead<<1)) {
730            if(leadByte<(kMinThreeByteValueLead<<1)) {
731                ++pos;
732            } else if(leadByte<(kFourByteValueLead<<1)) {
733                pos+=2;
734            } else {
735                pos+=3+((leadByte>>1)&1);
736            }
737        }
738        return pos;
739    }
740    private static int skipValue(byte[] bytes, int pos) {
741        int leadByte=bytes[pos++]&0xff;
742        return skipValue(pos, leadByte);
743    }
744
745    // Reads a jump delta and jumps.
746    private static int jumpByDelta(byte[] bytes, int pos) {
747        int delta=bytes[pos++]&0xff;
748        if(delta<kMinTwoByteDeltaLead) {
749            // nothing to do
750        } else if(delta<kMinThreeByteDeltaLead) {
751            delta=((delta-kMinTwoByteDeltaLead)<<8)|(bytes[pos++]&0xff);
752        } else if(delta<kFourByteDeltaLead) {
753            delta=((delta-kMinThreeByteDeltaLead)<<16)|((bytes[pos]&0xff)<<8)|(bytes[pos+1]&0xff);
754            pos+=2;
755        } else if(delta==kFourByteDeltaLead) {
756            delta=((bytes[pos]&0xff)<<16)|((bytes[pos+1]&0xff)<<8)|(bytes[pos+2]&0xff);
757            pos+=3;
758        } else {
759            delta=(bytes[pos]<<24)|((bytes[pos+1]&0xff)<<16)|((bytes[pos+2]&0xff)<<8)|(bytes[pos+3]&0xff);
760            pos+=4;
761        }
762        return pos+delta;
763    }
764
765    private static int skipDelta(byte[] bytes, int pos) {
766        int delta=bytes[pos++]&0xff;
767        if(delta>=kMinTwoByteDeltaLead) {
768            if(delta<kMinThreeByteDeltaLead) {
769                ++pos;
770            } else if(delta<kFourByteDeltaLead) {
771                pos+=2;
772            } else {
773                pos+=3+(delta&1);
774            }
775        }
776        return pos;
777    }
778
779    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
780
781    // Handles a branch node for both next(byte) and next(string).
782    private Result branchNext(int pos, int length, int inByte) {
783        // Branch according to the current byte.
784        if(length==0) {
785            length=bytes_[pos++]&0xff;
786        }
787        ++length;
788        // The length of the branch is the number of bytes to select from.
789        // The data structure encodes a binary search.
790        while(length>kMaxBranchLinearSubNodeLength) {
791            if(inByte<(bytes_[pos++]&0xff)) {
792                length>>=1;
793                pos=jumpByDelta(bytes_, pos);
794            } else {
795                length=length-(length>>1);
796                pos=skipDelta(bytes_, pos);
797            }
798        }
799        // Drop down to linear search for the last few bytes.
800        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
801        // and divides length by 2.
802        do {
803            if(inByte==(bytes_[pos++]&0xff)) {
804                Result result;
805                int node=bytes_[pos]&0xff;
806                assert(node>=kMinValueLead);
807                if((node&kValueIsFinal)!=0) {
808                    // Leave the final value for getValue() to read.
809                    result=Result.FINAL_VALUE;
810                } else {
811                    // Use the non-final value as the jump delta.
812                    ++pos;
813                    // int delta=readValue(pos, node>>1);
814                    node>>=1;
815                    int delta;
816                    if(node<kMinTwoByteValueLead) {
817                        delta=node-kMinOneByteValueLead;
818                    } else if(node<kMinThreeByteValueLead) {
819                        delta=((node-kMinTwoByteValueLead)<<8)|(bytes_[pos++]&0xff);
820                    } else if(node<kFourByteValueLead) {
821                        delta=((node-kMinThreeByteValueLead)<<16)|((bytes_[pos]&0xff)<<8)|(bytes_[pos+1]&0xff);
822                        pos+=2;
823                    } else if(node==kFourByteValueLead) {
824                        delta=((bytes_[pos]&0xff)<<16)|((bytes_[pos+1]&0xff)<<8)|(bytes_[pos+2]&0xff);
825                        pos+=3;
826                    } else {
827                        delta=(bytes_[pos]<<24)|((bytes_[pos+1]&0xff)<<16)|((bytes_[pos+2]&0xff)<<8)|(bytes_[pos+3]&0xff);
828                        pos+=4;
829                    }
830                    // end readValue()
831                    pos+=delta;
832                    node=bytes_[pos]&0xff;
833                    result= node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
834                }
835                pos_=pos;
836                return result;
837            }
838            --length;
839            pos=skipValue(bytes_, pos);
840        } while(length>1);
841        if(inByte==(bytes_[pos++]&0xff)) {
842            pos_=pos;
843            int node=bytes_[pos]&0xff;
844            return node>=kMinValueLead ? valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
845        } else {
846            stop();
847            return Result.NO_MATCH;
848        }
849    }
850
851    // Requires remainingLength_<0.
852    private Result nextImpl(int pos, int inByte) {
853        for(;;) {
854            int node=bytes_[pos++]&0xff;
855            if(node<kMinLinearMatch) {
856                return branchNext(pos, node, inByte);
857            } else if(node<kMinValueLead) {
858                // Match the first of length+1 bytes.
859                int length=node-kMinLinearMatch;  // Actual match length minus 1.
860                if(inByte==(bytes_[pos++]&0xff)) {
861                    remainingMatchLength_=--length;
862                    pos_=pos;
863                    return (length<0 && (node=bytes_[pos]&0xff)>=kMinValueLead) ?
864                            valueResults_[node&kValueIsFinal] : Result.NO_VALUE;
865                } else {
866                    // No match.
867                    break;
868                }
869            } else if((node&kValueIsFinal)!=0) {
870                // No further matching bytes.
871                break;
872            } else {
873                // Skip intermediate value.
874                pos=skipValue(pos, node);
875                // The next node must not also be a value node.
876                assert((bytes_[pos]&0xff)<kMinValueLead);
877            }
878        }
879        stop();
880        return Result.NO_MATCH;
881    }
882
883    // Helper functions for getUniqueValue().
884    // Recursively finds a unique value (or whether there is not a unique one)
885    // from a branch.
886    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
887    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
888    private static long findUniqueValueFromBranch(byte[] bytes, int pos, int length,
889                                                  long uniqueValue) {
890        while(length>kMaxBranchLinearSubNodeLength) {
891            ++pos;  // ignore the comparison byte
892            uniqueValue=findUniqueValueFromBranch(bytes, jumpByDelta(bytes, pos), length>>1, uniqueValue);
893            if(uniqueValue==0) {
894                return 0;
895            }
896            length=length-(length>>1);
897            pos=skipDelta(bytes, pos);
898        }
899        do {
900            ++pos;  // ignore a comparison byte
901            // handle its value
902            int node=bytes[pos++]&0xff;
903            boolean isFinal=(node&kValueIsFinal)!=0;
904            int value=readValue(bytes, pos, node>>1);
905            pos=skipValue(pos, node);
906            if(isFinal) {
907                if(uniqueValue!=0) {
908                    if(value!=(int)(uniqueValue>>1)) {
909                        return 0;
910                    }
911                } else {
912                    uniqueValue=((long)value<<1)|1;
913                }
914            } else {
915                uniqueValue=findUniqueValue(bytes, pos+value, uniqueValue);
916                if(uniqueValue==0) {
917                    return 0;
918                }
919            }
920        } while(--length>1);
921        // ignore the last comparison byte
922        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
923    }
924    // Recursively finds a unique value (or whether there is not a unique one)
925    // starting from a position on a node lead byte.
926    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
927    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
928    private static long findUniqueValue(byte[] bytes, int pos, long uniqueValue) {
929        for(;;) {
930            int node=bytes[pos++]&0xff;
931            if(node<kMinLinearMatch) {
932                if(node==0) {
933                    node=bytes[pos++]&0xff;
934                }
935                uniqueValue=findUniqueValueFromBranch(bytes, pos, node+1, uniqueValue);
936                if(uniqueValue==0) {
937                    return 0;
938                }
939                pos=(int)(uniqueValue>>>33);
940            } else if(node<kMinValueLead) {
941                // linear-match node
942                pos+=node-kMinLinearMatch+1;  // Ignore the match bytes.
943            } else {
944                boolean isFinal=(node&kValueIsFinal)!=0;
945                int value=readValue(bytes, pos, node>>1);
946                if(uniqueValue!=0) {
947                    if(value!=(int)(uniqueValue>>1)) {
948                        return 0;
949                    }
950                } else {
951                    uniqueValue=((long)value<<1)|1;
952                }
953                if(isFinal) {
954                    return uniqueValue;
955                }
956                pos=skipValue(pos, node);
957            }
958        }
959    }
960
961    // Helper functions for getNextBytes().
962    // getNextBytes() when pos is on a branch node.
963    private static void getNextBranchBytes(byte[] bytes, int pos, int length, Appendable out) {
964        while(length>kMaxBranchLinearSubNodeLength) {
965            ++pos;  // ignore the comparison byte
966            getNextBranchBytes(bytes, jumpByDelta(bytes, pos), length>>1, out);
967            length=length-(length>>1);
968            pos=skipDelta(bytes, pos);
969        }
970        do {
971            append(out, bytes[pos++]&0xff);
972            pos=skipValue(bytes, pos);
973        } while(--length>1);
974        append(out, bytes[pos]&0xff);
975    }
976    private static void append(Appendable out, int c) {
977        try {
978            out.append((char)c);
979        } catch(IOException e) {
980            throw new ICUUncheckedIOException(e);
981        }
982    }
983
984    // BytesTrie data structure
985    //
986    // The trie consists of a series of byte-serialized nodes for incremental
987    // string/byte sequence matching. The root node is at the beginning of the trie data.
988    //
989    // Types of nodes are distinguished by their node lead byte ranges.
990    // After each node, except a final-value node, another node follows to
991    // encode match values or continue matching further bytes.
992    //
993    // Node types:
994    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
995    //    The value is for the string/byte sequence so far.
996    //    One node bit indicates whether the value is final or whether
997    //    matching continues with the next node.
998    //  - Linear-match node: Matches a number of bytes.
999    //  - Branch node: Branches to other nodes according to the current input byte.
1000    //    The node byte is the length of the branch (number of bytes to select from)
1001    //    minus 1. It is followed by a sub-node:
1002    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
1003    //      there are length-1 (key, value) pairs and then one more comparison byte.
1004    //      If one of the key bytes matches, then the value is either a final value for
1005    //      the string/byte sequence so far, or a "jump" delta to the next node.
1006    //      If the last byte matches, then matching continues with the next node.
1007    //      (Values have the same encoding as value nodes.)
1008    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
1009    //      there is one byte and one "jump" delta.
1010    //      If the input byte is less than the sub-node byte, then "jump" by delta to
1011    //      the next sub-node which will have a length of length/2.
1012    //      (The delta has its own compact encoding.)
1013    //      Otherwise, skip the "jump" delta to the next sub-node
1014    //      which will have a length of length-length/2.
1015
1016    // Node lead byte values.
1017
1018    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
1019    // the length is one more than the next byte.
1020
1021    // For a branch sub-node with at most this many entries, we drop down
1022    // to a linear search.
1023    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
1024
1025    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
1026    /*package*/ static final int kMinLinearMatch=0x10;
1027    /*package*/ static final int kMaxLinearMatchLength=0x10;
1028
1029    // 20..ff: Variable-length value node.
1030    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
1031    // Then shift-right by 1 bit.
1032    // The remaining lead byte value indicates the number of following bytes (0..4)
1033    // and contains the value's top bits.
1034    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
1035    // It is a final value if bit 0 is set.
1036    private static final int kValueIsFinal=1;
1037
1038    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
1039    /*package*/ static final int kMinOneByteValueLead=kMinValueLead/2;  // 0x10
1040    /*package*/ static final int kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
1041
1042    /*package*/ static final int kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
1043    /*package*/ static final int kMaxTwoByteValue=0x1aff;
1044
1045    /*package*/ static final int kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
1046    /*package*/ static final int kFourByteValueLead=0x7e;
1047
1048    // A little more than Unicode code points. (0x11ffff)
1049    /*package*/ static final int kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
1050
1051    /*package*/ static final int kFiveByteValueLead=0x7f;
1052
1053    // Compact delta integers.
1054    /*package*/ static final int kMaxOneByteDelta=0xbf;
1055    /*package*/ static final int kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
1056    /*package*/ static final int kMinThreeByteDeltaLead=0xf0;
1057    /*package*/ static final int kFourByteDeltaLead=0xfe;
1058    /*package*/ static final int kFiveByteDeltaLead=0xff;
1059
1060    /*package*/ static final int kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
1061    /*package*/ static final int kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
1062
1063    // Fixed value referencing the BytesTrie bytes.
1064    private byte[] bytes_;
1065    private int root_;
1066
1067    // Iterator variables.
1068
1069    // Index of next trie byte to read. Negative if no more matches.
1070    private int pos_;
1071    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1072    private int remainingMatchLength_;
1073};
1074