1/* GENERATED SOURCE. DO NOT MODIFY. */
2// © 2016 and later: Unicode, Inc. and others.
3// License & terms of use: http://www.unicode.org/copyright.html#License
4/*
5*******************************************************************************
6*   Copyright (C) 2011-2014, International Business Machines
7*   Corporation and others.  All Rights Reserved.
8*******************************************************************************
9*   created on: 2011jan06
10*   created by: Markus W. Scherer
11*   ported from ICU4C ucharstrie.h/.cpp
12*/
13
14package android.icu.util;
15
16import java.io.IOException;
17import java.util.ArrayList;
18import java.util.NoSuchElementException;
19
20import android.icu.text.UTF16;
21import android.icu.util.BytesTrie.Result;
22
23/**
24 * Light-weight, non-const reader class for a CharsTrie.
25 * Traverses a char-serialized data structure with minimal state,
26 * for mapping strings (16-bit-unit sequences) to non-negative integer values.
27 *
28 * <p>This class is not intended for public subclassing.
29 *
30 * @author Markus W. Scherer
31 * @hide Only a subset of ICU is exposed in Android
32 */
33public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
34    /**
35     * Constructs a CharsTrie reader instance.
36     *
37     * <p>The CharSequence must contain a copy of a char sequence from the CharsTrieBuilder,
38     * with the offset indicating the first char of that sequence.
39     * The CharsTrie object will not read more chars than
40     * the CharsTrieBuilder generated in the corresponding build() call.
41     *
42     * <p>The CharSequence is not copied/cloned and must not be modified while
43     * the CharsTrie object is in use.
44     *
45     * @param trieChars CharSequence that contains the serialized trie.
46     * @param offset Root offset of the trie in the CharSequence.
47     */
48    public CharsTrie(CharSequence trieChars, int offset) {
49        chars_=trieChars;
50        pos_=root_=offset;
51        remainingMatchLength_=-1;
52    }
53
54    /**
55     * Clones this trie reader object and its state,
56     * but not the char array which will be shared.
57     * @return A shallow clone of this trie.
58     */
59    @Override
60    public Object clone() throws CloneNotSupportedException {
61        return super.clone();  // A shallow copy is just what we need.
62    }
63
64    /**
65     * Resets this trie to its initial state.
66     * @return this
67     */
68    public CharsTrie reset() {
69        pos_=root_;
70        remainingMatchLength_=-1;
71        return this;
72    }
73
74    /**
75     * CharsTrie state object, for saving a trie's current state
76     * and resetting the trie back to this state later.
77     */
78    public static final class State {
79        /**
80         * Constructs an empty State.
81         */
82        public State() {}
83        private CharSequence chars;
84        private int root;
85        private int pos;
86        private int remainingMatchLength;
87    }
88
89    /**
90     * Saves the state of this trie.
91     * @param state The State object to hold the trie's state.
92     * @return this
93     * @see #resetToState
94     */
95    public CharsTrie saveState(State state) /*const*/ {
96        state.chars=chars_;
97        state.root=root_;
98        state.pos=pos_;
99        state.remainingMatchLength=remainingMatchLength_;
100        return this;
101    }
102
103    /**
104     * Resets this trie to the saved state.
105     * @param state The State object which holds a saved trie state.
106     * @return this
107     * @throws IllegalArgumentException if the state object contains no state,
108     *         or the state of a different trie
109     * @see #saveState
110     * @see #reset
111     */
112    public CharsTrie resetToState(State state) {
113        if(chars_==state.chars && chars_!=null && root_==state.root) {
114            pos_=state.pos;
115            remainingMatchLength_=state.remainingMatchLength;
116        } else {
117            throw new IllegalArgumentException("incompatible trie state");
118        }
119        return this;
120    }
121
122    /**
123     * Determines whether the string so far matches, whether it has a value,
124     * and whether another input char can continue a matching string.
125     * @return The match/value Result.
126     */
127    public Result current() /*const*/ {
128        int pos=pos_;
129        if(pos<0) {
130            return Result.NO_MATCH;
131        } else {
132            int node;
133            return (remainingMatchLength_<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
134                    valueResults_[node>>15] : Result.NO_VALUE;
135        }
136    }
137
138    /**
139     * Traverses the trie from the initial state for this input char.
140     * Equivalent to reset().next(inUnit).
141     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
142     * @return The match/value Result.
143     */
144    public Result first(int inUnit) {
145        remainingMatchLength_=-1;
146        return nextImpl(root_, inUnit);
147    }
148
149    /**
150     * Traverses the trie from the initial state for the
151     * one or two UTF-16 code units for this input code point.
152     * Equivalent to reset().nextForCodePoint(cp).
153     * @param cp A Unicode code point 0..0x10ffff.
154     * @return The match/value Result.
155     */
156    public Result firstForCodePoint(int cp) {
157        return cp<=0xffff ?
158            first(cp) :
159            (first(UTF16.getLeadSurrogate(cp)).hasNext() ?
160                next(UTF16.getTrailSurrogate(cp)) :
161                Result.NO_MATCH);
162    }
163
164    /**
165     * Traverses the trie from the current state for this input char.
166     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
167     * @return The match/value Result.
168     */
169    public Result next(int inUnit) {
170        int pos=pos_;
171        if(pos<0) {
172            return Result.NO_MATCH;
173        }
174        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
175        if(length>=0) {
176            // Remaining part of a linear-match node.
177            if(inUnit==chars_.charAt(pos++)) {
178                remainingMatchLength_=--length;
179                pos_=pos;
180                int node;
181                return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
182                        valueResults_[node>>15] : Result.NO_VALUE;
183            } else {
184                stop();
185                return Result.NO_MATCH;
186            }
187        }
188        return nextImpl(pos, inUnit);
189    }
190
191    /**
192     * Traverses the trie from the current state for the
193     * one or two UTF-16 code units for this input code point.
194     * @param cp A Unicode code point 0..0x10ffff.
195     * @return The match/value Result.
196     */
197    public Result nextForCodePoint(int cp) {
198        return cp<=0xffff ?
199            next(cp) :
200            (next(UTF16.getLeadSurrogate(cp)).hasNext() ?
201                next(UTF16.getTrailSurrogate(cp)) :
202                Result.NO_MATCH);
203    }
204
205    /**
206     * Traverses the trie from the current state for this string.
207     * Equivalent to
208     * <pre>
209     * Result result=current();
210     * for(each c in s)
211     *   if(!result.hasNext()) return Result.NO_MATCH;
212     *   result=next(c);
213     * return result;
214     * </pre>
215     * @param s Contains a string.
216     * @param sIndex The start index of the string in s.
217     * @param sLimit The (exclusive) end index of the string in s.
218     * @return The match/value Result.
219     */
220    public Result next(CharSequence s, int sIndex, int sLimit) {
221        if(sIndex>=sLimit) {
222            // Empty input.
223            return current();
224        }
225        int pos=pos_;
226        if(pos<0) {
227            return Result.NO_MATCH;
228        }
229        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
230        for(;;) {
231            // Fetch the next input unit, if there is one.
232            // Continue a linear-match node.
233            char inUnit;
234            for(;;) {
235                if(sIndex==sLimit) {
236                    remainingMatchLength_=length;
237                    pos_=pos;
238                    int node;
239                    return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
240                            valueResults_[node>>15] : Result.NO_VALUE;
241                }
242                inUnit=s.charAt(sIndex++);
243                if(length<0) {
244                    remainingMatchLength_=length;
245                    break;
246                }
247                if(inUnit!=chars_.charAt(pos)) {
248                    stop();
249                    return Result.NO_MATCH;
250                }
251                ++pos;
252                --length;
253            }
254            int node=chars_.charAt(pos++);
255            for(;;) {
256                if(node<kMinLinearMatch) {
257                    Result result=branchNext(pos, node, inUnit);
258                    if(result==Result.NO_MATCH) {
259                        return Result.NO_MATCH;
260                    }
261                    // Fetch the next input unit, if there is one.
262                    if(sIndex==sLimit) {
263                        return result;
264                    }
265                    if(result==Result.FINAL_VALUE) {
266                        // No further matching units.
267                        stop();
268                        return Result.NO_MATCH;
269                    }
270                    inUnit=s.charAt(sIndex++);
271                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
272                    node=chars_.charAt(pos++);
273                } else if(node<kMinValueLead) {
274                    // Match length+1 units.
275                    length=node-kMinLinearMatch;  // Actual match length minus 1.
276                    if(inUnit!=chars_.charAt(pos)) {
277                        stop();
278                        return Result.NO_MATCH;
279                    }
280                    ++pos;
281                    --length;
282                    break;
283                } else if((node&kValueIsFinal)!=0) {
284                    // No further matching units.
285                    stop();
286                    return Result.NO_MATCH;
287                } else {
288                    // Skip intermediate value.
289                    pos=skipNodeValue(pos, node);
290                    node&=kNodeTypeMask;
291                }
292            }
293        }
294    }
295
296    /**
297     * Returns a matching string's value if called immediately after
298     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
299     * getValue() can be called multiple times.
300     *
301     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
302     * @return The value for the string so far.
303     */
304    public int getValue() /*const*/ {
305        int pos=pos_;
306        int leadUnit=chars_.charAt(pos++);
307        assert(leadUnit>=kMinValueLead);
308        return (leadUnit&kValueIsFinal)!=0 ?
309            readValue(chars_, pos, leadUnit&0x7fff) : readNodeValue(chars_, pos, leadUnit);
310    }
311
312    /**
313     * Determines whether all strings reachable from the current state
314     * map to the same value, and if so, returns that value.
315     * @return The unique value in bits 32..1 with bit 0 set,
316     *         if all strings reachable from the current state
317     *         map to the same value; otherwise returns 0.
318     */
319    public long getUniqueValue() /*const*/ {
320        int pos=pos_;
321        if(pos<0) {
322            return 0;
323        }
324        // Skip the rest of a pending linear-match node.
325        long uniqueValue=findUniqueValue(chars_, pos+remainingMatchLength_+1, 0);
326        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
327        return (uniqueValue<<31)>>31;
328    }
329
330    /**
331     * Finds each char which continues the string from the current state.
332     * That is, each char c for which it would be next(c)!=Result.NO_MATCH now.
333     * @param out Each next char is appended to this object.
334     *            (Only uses the out.append(c) method.)
335     * @return The number of chars which continue the string from here.
336     */
337    public int getNextChars(Appendable out) /*const*/ {
338        int pos=pos_;
339        if(pos<0) {
340            return 0;
341        }
342        if(remainingMatchLength_>=0) {
343            append(out, chars_.charAt(pos));  // Next unit of a pending linear-match node.
344            return 1;
345        }
346        int node=chars_.charAt(pos++);
347        if(node>=kMinValueLead) {
348            if((node&kValueIsFinal)!=0) {
349                return 0;
350            } else {
351                pos=skipNodeValue(pos, node);
352                node&=kNodeTypeMask;
353            }
354        }
355        if(node<kMinLinearMatch) {
356            if(node==0) {
357                node=chars_.charAt(pos++);
358            }
359            getNextBranchChars(chars_, pos, ++node, out);
360            return node;
361        } else {
362            // First unit of the linear-match node.
363            append(out, chars_.charAt(pos));
364            return 1;
365        }
366    }
367
368    /**
369     * Iterates from the current state of this trie.
370     * @return A new CharsTrie.Iterator.
371     */
372    @Override
373    public Iterator iterator() {
374        return new Iterator(chars_, pos_, remainingMatchLength_, 0);
375    }
376
377    /**
378     * Iterates from the current state of this trie.
379     * @param maxStringLength If 0, the iterator returns full strings.
380     *                        Otherwise, the iterator returns strings with this maximum length.
381     * @return A new CharsTrie.Iterator.
382     */
383    public Iterator iterator(int maxStringLength) {
384        return new Iterator(chars_, pos_, remainingMatchLength_, maxStringLength);
385    }
386
387    /**
388     * Iterates from the root of a char-serialized BytesTrie.
389     * @param trieChars CharSequence that contains the serialized trie.
390     * @param offset Root offset of the trie in the CharSequence.
391     * @param maxStringLength If 0, the iterator returns full strings.
392     *                        Otherwise, the iterator returns strings with this maximum length.
393     * @return A new CharsTrie.Iterator.
394     */
395    public static Iterator iterator(CharSequence trieChars, int offset, int maxStringLength) {
396        return new Iterator(trieChars, offset, -1, maxStringLength);
397    }
398
399    /**
400     * Return value type for the Iterator.
401     */
402    public static final class Entry {
403        /**
404         * The string.
405         */
406        public CharSequence chars;
407        /**
408         * The value associated with the string.
409         */
410        public int value;
411
412        private Entry() {
413        }
414    }
415
416    /**
417     * Iterator for all of the (string, value) pairs in a CharsTrie.
418     */
419    public static final class Iterator implements java.util.Iterator<Entry> {
420        private Iterator(CharSequence trieChars, int offset, int remainingMatchLength, int maxStringLength) {
421            chars_=trieChars;
422            pos_=initialPos_=offset;
423            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
424            maxLength_=maxStringLength;
425            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
426            if(length>=0) {
427                // Pending linear-match node, append remaining bytes to str_.
428                ++length;
429                if(maxLength_>0 && length>maxLength_) {
430                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
431                }
432                str_.append(chars_, pos_, pos_+length);
433                pos_+=length;
434                remainingMatchLength_-=length;
435            }
436        }
437
438        /**
439         * Resets this iterator to its initial state.
440         * @return this
441         */
442        public Iterator reset() {
443            pos_=initialPos_;
444            remainingMatchLength_=initialRemainingMatchLength_;
445            skipValue_=false;
446            int length=remainingMatchLength_+1;  // Remaining match length.
447            if(maxLength_>0 && length>maxLength_) {
448                length=maxLength_;
449            }
450            str_.setLength(length);
451            pos_+=length;
452            remainingMatchLength_-=length;
453            stack_.clear();
454            return this;
455        }
456
457        /**
458         * @return true if there are more elements.
459         */
460        @Override
461        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
462
463        /**
464         * Finds the next (string, value) pair if there is one.
465         *
466         * If the string is truncated to the maximum length and does not
467         * have a real value, then the value is set to -1.
468         * In this case, this "not a real value" is indistinguishable from
469         * a real value of -1.
470         * @return An Entry with the string and value of the next element.
471         * @throws NoSuchElementException - iteration has no more elements.
472         */
473        @Override
474        public Entry next() {
475            int pos=pos_;
476            if(pos<0) {
477                if(stack_.isEmpty()) {
478                    throw new NoSuchElementException();
479                }
480                // Pop the state off the stack and continue with the next outbound edge of
481                // the branch node.
482                long top=stack_.remove(stack_.size()-1);
483                int length=(int)top;
484                pos=(int)(top>>32);
485                str_.setLength(length&0xffff);
486                length>>>=16;
487                if(length>1) {
488                    pos=branchNext(pos, length);
489                    if(pos<0) {
490                        return entry_;  // Reached a final value.
491                    }
492                } else {
493                    str_.append(chars_.charAt(pos++));
494                }
495            }
496            if(remainingMatchLength_>=0) {
497                // We only get here if we started in a pending linear-match node
498                // with more than maxLength remaining units.
499                return truncateAndStop();
500            }
501            for(;;) {
502                int node=chars_.charAt(pos++);
503                if(node>=kMinValueLead) {
504                    if(skipValue_) {
505                        pos=skipNodeValue(pos, node);
506                        node&=kNodeTypeMask;
507                        skipValue_=false;
508                    } else {
509                        // Deliver value for the string so far.
510                        boolean isFinal=(node&kValueIsFinal)!=0;
511                        if(isFinal) {
512                            entry_.value=readValue(chars_, pos, node&0x7fff);
513                        } else {
514                            entry_.value=readNodeValue(chars_, pos, node);
515                        }
516                        if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
517                            pos_=-1;
518                        } else {
519                            // We cannot skip the value right here because it shares its
520                            // lead unit with a match node which we have to evaluate
521                            // next time.
522                            // Instead, keep pos_ on the node lead unit itself.
523                            pos_=pos-1;
524                            skipValue_=true;
525                        }
526                        entry_.chars=str_;
527                        return entry_;
528                    }
529                }
530                if(maxLength_>0 && str_.length()==maxLength_) {
531                    return truncateAndStop();
532                }
533                if(node<kMinLinearMatch) {
534                    if(node==0) {
535                        node=chars_.charAt(pos++);
536                    }
537                    pos=branchNext(pos, node+1);
538                    if(pos<0) {
539                        return entry_;  // Reached a final value.
540                    }
541                } else {
542                    // Linear-match node, append length units to str_.
543                    int length=node-kMinLinearMatch+1;
544                    if(maxLength_>0 && str_.length()+length>maxLength_) {
545                        str_.append(chars_, pos, pos+maxLength_-str_.length());
546                        return truncateAndStop();
547                    }
548                    str_.append(chars_, pos, pos+length);
549                    pos+=length;
550                }
551            }
552        }
553
554        /**
555         * Iterator.remove() is not supported.
556         * @throws UnsupportedOperationException (always)
557         */
558        @Override
559        public void remove() {
560            throw new UnsupportedOperationException();
561        }
562
563        private Entry truncateAndStop() {
564            pos_=-1;
565            // We reset entry_.chars every time we return entry_
566            // just because the caller might have modified the Entry.
567            entry_.chars=str_;
568            entry_.value=-1;  // no real value for str
569            return entry_;
570        }
571
572        private int branchNext(int pos, int length) {
573            while(length>kMaxBranchLinearSubNodeLength) {
574                ++pos;  // ignore the comparison unit
575                // Push state for the greater-or-equal edge.
576                stack_.add(((long)skipDelta(chars_, pos)<<32)|((length-(length>>1))<<16)|str_.length());
577                // Follow the less-than edge.
578                length>>=1;
579                pos=jumpByDelta(chars_, pos);
580            }
581            // List of key-value pairs where values are either final values or jump deltas.
582            // Read the first (key, value) pair.
583            char trieUnit=chars_.charAt(pos++);
584            int node=chars_.charAt(pos++);
585            boolean isFinal=(node&kValueIsFinal)!=0;
586            int value=readValue(chars_, pos, node&=0x7fff);
587            pos=skipValue(pos, node);
588            stack_.add(((long)pos<<32)|((length-1)<<16)|str_.length());
589            str_.append(trieUnit);
590            if(isFinal) {
591                pos_=-1;
592                entry_.chars=str_;
593                entry_.value=value;
594                return -1;
595            } else {
596                return pos+value;
597            }
598        }
599
600        private CharSequence chars_;
601        private int pos_;
602        private int initialPos_;
603        private int remainingMatchLength_;
604        private int initialRemainingMatchLength_;
605        private boolean skipValue_;  // Skip intermediate value which was already delivered.
606
607        private StringBuilder str_=new StringBuilder();
608        private int maxLength_;
609        private Entry entry_=new Entry();
610
611        // The stack stores longs for backtracking to another
612        // outbound edge of a branch node.
613        // Each long has the offset in chars_ in bits 62..32,
614        // the str_.length() from before the node in bits 15..0,
615        // and the remaining branch length in bits 31..16.
616        // (We could store the remaining branch length minus 1 in bits 30..16 and not use bit 31,
617        // but the code looks more confusing that way.)
618        private ArrayList<Long> stack_=new ArrayList<Long>();
619    }
620
621    private void stop() {
622        pos_=-1;
623    }
624
625    // Reads a compact 32-bit integer.
626    // pos is already after the leadUnit, and the lead unit has bit 15 reset.
627    private static int readValue(CharSequence chars, int pos, int leadUnit) {
628        int value;
629        if(leadUnit<kMinTwoUnitValueLead) {
630            value=leadUnit;
631        } else if(leadUnit<kThreeUnitValueLead) {
632            value=((leadUnit-kMinTwoUnitValueLead)<<16)|chars.charAt(pos);
633        } else {
634            value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
635        }
636        return value;
637    }
638    private static int skipValue(int pos, int leadUnit) {
639        if(leadUnit>=kMinTwoUnitValueLead) {
640            if(leadUnit<kThreeUnitValueLead) {
641                ++pos;
642            } else {
643                pos+=2;
644            }
645        }
646        return pos;
647    }
648    private static int skipValue(CharSequence chars, int pos) {
649        int leadUnit=chars.charAt(pos++);
650        return skipValue(pos, leadUnit&0x7fff);
651    }
652
653    private static int readNodeValue(CharSequence chars, int pos, int leadUnit) {
654        assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
655        int value;
656        if(leadUnit<kMinTwoUnitNodeValueLead) {
657            value=(leadUnit>>6)-1;
658        } else if(leadUnit<kThreeUnitNodeValueLead) {
659            value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|chars.charAt(pos);
660        } else {
661            value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
662        }
663        return value;
664    }
665    private static int skipNodeValue(int pos, int leadUnit) {
666        assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
667        if(leadUnit>=kMinTwoUnitNodeValueLead) {
668            if(leadUnit<kThreeUnitNodeValueLead) {
669                ++pos;
670            } else {
671                pos+=2;
672            }
673        }
674        return pos;
675    }
676
677    private static int jumpByDelta(CharSequence chars, int pos) {
678        int delta=chars.charAt(pos++);
679        if(delta>=kMinTwoUnitDeltaLead) {
680            if(delta==kThreeUnitDeltaLead) {
681                delta=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
682                pos+=2;
683            } else {
684                delta=((delta-kMinTwoUnitDeltaLead)<<16)|chars.charAt(pos++);
685            }
686        }
687        return pos+delta;
688    }
689
690    private static int skipDelta(CharSequence chars, int pos) {
691        int delta=chars.charAt(pos++);
692        if(delta>=kMinTwoUnitDeltaLead) {
693            if(delta==kThreeUnitDeltaLead) {
694                pos+=2;
695            } else {
696                ++pos;
697            }
698        }
699        return pos;
700    }
701
702    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
703
704    // Handles a branch node for both next(unit) and next(string).
705    private Result branchNext(int pos, int length, int inUnit) {
706        // Branch according to the current unit.
707        if(length==0) {
708            length=chars_.charAt(pos++);
709        }
710        ++length;
711        // The length of the branch is the number of units to select from.
712        // The data structure encodes a binary search.
713        while(length>kMaxBranchLinearSubNodeLength) {
714            if(inUnit<chars_.charAt(pos++)) {
715                length>>=1;
716                pos=jumpByDelta(chars_, pos);
717            } else {
718                length=length-(length>>1);
719                pos=skipDelta(chars_, pos);
720            }
721        }
722        // Drop down to linear search for the last few units.
723        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
724        // and divides length by 2.
725        do {
726            if(inUnit==chars_.charAt(pos++)) {
727                Result result;
728                int node=chars_.charAt(pos);
729                if((node&kValueIsFinal)!=0) {
730                    // Leave the final value for getValue() to read.
731                    result=Result.FINAL_VALUE;
732                } else {
733                    // Use the non-final value as the jump delta.
734                    ++pos;
735                    // int delta=readValue(pos, node);
736                    int delta;
737                    if(node<kMinTwoUnitValueLead) {
738                        delta=node;
739                    } else if(node<kThreeUnitValueLead) {
740                        delta=((node-kMinTwoUnitValueLead)<<16)|chars_.charAt(pos++);
741                    } else {
742                        delta=(chars_.charAt(pos)<<16)|chars_.charAt(pos+1);
743                        pos+=2;
744                    }
745                    // end readValue()
746                    pos+=delta;
747                    node=chars_.charAt(pos);
748                    result= node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
749                }
750                pos_=pos;
751                return result;
752            }
753            --length;
754            pos=skipValue(chars_, pos);
755        } while(length>1);
756        if(inUnit==chars_.charAt(pos++)) {
757            pos_=pos;
758            int node=chars_.charAt(pos);
759            return node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
760        } else {
761            stop();
762            return Result.NO_MATCH;
763        }
764    }
765
766    // Requires remainingLength_<0.
767    private Result nextImpl(int pos, int inUnit) {
768        int node=chars_.charAt(pos++);
769        for(;;) {
770            if(node<kMinLinearMatch) {
771                return branchNext(pos, node, inUnit);
772            } else if(node<kMinValueLead) {
773                // Match the first of length+1 units.
774                int length=node-kMinLinearMatch;  // Actual match length minus 1.
775                if(inUnit==chars_.charAt(pos++)) {
776                    remainingMatchLength_=--length;
777                    pos_=pos;
778                    return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
779                            valueResults_[node>>15] : Result.NO_VALUE;
780                } else {
781                    // No match.
782                    break;
783                }
784            } else if((node&kValueIsFinal)!=0) {
785                // No further matching units.
786                break;
787            } else {
788                // Skip intermediate value.
789                pos=skipNodeValue(pos, node);
790                node&=kNodeTypeMask;
791            }
792        }
793        stop();
794        return Result.NO_MATCH;
795    }
796
797    // Helper functions for getUniqueValue().
798    // Recursively finds a unique value (or whether there is not a unique one)
799    // from a branch.
800    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
801    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
802    private static long findUniqueValueFromBranch(CharSequence chars, int pos, int length,
803                                                  long uniqueValue) {
804        while(length>kMaxBranchLinearSubNodeLength) {
805            ++pos;  // ignore the comparison unit
806            uniqueValue=findUniqueValueFromBranch(chars, jumpByDelta(chars, pos), length>>1, uniqueValue);
807            if(uniqueValue==0) {
808                return 0;
809            }
810            length=length-(length>>1);
811            pos=skipDelta(chars, pos);
812        }
813        do {
814            ++pos;  // ignore a comparison unit
815            // handle its value
816            int node=chars.charAt(pos++);
817            boolean isFinal=(node&kValueIsFinal)!=0;
818            node&=0x7fff;
819            int value=readValue(chars, pos, node);
820            pos=skipValue(pos, node);
821            if(isFinal) {
822                if(uniqueValue!=0) {
823                    if(value!=(int)(uniqueValue>>1)) {
824                        return 0;
825                    }
826                } else {
827                    uniqueValue=((long)value<<1)|1;
828                }
829            } else {
830                uniqueValue=findUniqueValue(chars, pos+value, uniqueValue);
831                if(uniqueValue==0) {
832                    return 0;
833                }
834            }
835        } while(--length>1);
836        // ignore the last comparison byte
837        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
838    }
839    // Recursively finds a unique value (or whether there is not a unique one)
840    // starting from a position on a node lead unit.
841    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
842    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
843    private static long findUniqueValue(CharSequence chars, int pos, long uniqueValue) {
844        int node=chars.charAt(pos++);
845        for(;;) {
846            if(node<kMinLinearMatch) {
847                if(node==0) {
848                    node=chars.charAt(pos++);
849                }
850                uniqueValue=findUniqueValueFromBranch(chars, pos, node+1, uniqueValue);
851                if(uniqueValue==0) {
852                    return 0;
853                }
854                pos=(int)(uniqueValue>>>33);
855                node=chars.charAt(pos++);
856            } else if(node<kMinValueLead) {
857                // linear-match node
858                pos+=node-kMinLinearMatch+1;  // Ignore the match units.
859                node=chars.charAt(pos++);
860            } else {
861                boolean isFinal=(node&kValueIsFinal)!=0;
862                int value;
863                if(isFinal) {
864                    value=readValue(chars, pos, node&0x7fff);
865                } else {
866                    value=readNodeValue(chars, pos, node);
867                }
868                if(uniqueValue!=0) {
869                    if(value!=(int)(uniqueValue>>1)) {
870                        return 0;
871                    }
872                } else {
873                    uniqueValue=((long)value<<1)|1;
874                }
875                if(isFinal) {
876                    return uniqueValue;
877                }
878                pos=skipNodeValue(pos, node);
879                node&=kNodeTypeMask;
880            }
881        }
882    }
883
884    // Helper functions for getNextChars().
885    // getNextChars() when pos is on a branch node.
886    private static void getNextBranchChars(CharSequence chars, int pos, int length, Appendable out) {
887        while(length>kMaxBranchLinearSubNodeLength) {
888            ++pos;  // ignore the comparison unit
889            getNextBranchChars(chars, jumpByDelta(chars, pos), length>>1, out);
890            length=length-(length>>1);
891            pos=skipDelta(chars, pos);
892        }
893        do {
894            append(out, chars.charAt(pos++));
895            pos=skipValue(chars, pos);
896        } while(--length>1);
897        append(out, chars.charAt(pos));
898    }
899    private static void append(Appendable out, int c) {
900        try {
901            out.append((char)c);
902        } catch(IOException e) {
903            throw new ICUUncheckedIOException(e);
904        }
905    }
906
907    // CharsTrie data structure
908    //
909    // The trie consists of a series of char-serialized nodes for incremental
910    // Unicode string/char sequence matching. (char=16-bit unsigned integer)
911    // The root node is at the beginning of the trie data.
912    //
913    // Types of nodes are distinguished by their node lead unit ranges.
914    // After each node, except a final-value node, another node follows to
915    // encode match values or continue matching further units.
916    //
917    // Node types:
918    //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
919    //    The value is for the string/char sequence so far.
920    //  - Match node, optionally with an intermediate value in a different compact format.
921    //    The value, if present, is for the string/char sequence so far.
922    //
923    //  Aside from the value, which uses the node lead unit's high bits:
924    //
925    //  - Linear-match node: Matches a number of units.
926    //  - Branch node: Branches to other nodes according to the current input unit.
927    //    The node unit is the length of the branch (number of units to select from)
928    //    minus 1. It is followed by a sub-node:
929    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
930    //      there are length-1 (key, value) pairs and then one more comparison unit.
931    //      If one of the key units matches, then the value is either a final value for
932    //      the string so far, or a "jump" delta to the next node.
933    //      If the last unit matches, then matching continues with the next node.
934    //      (Values have the same encoding as final-value nodes.)
935    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
936    //      there is one unit and one "jump" delta.
937    //      If the input unit is less than the sub-node unit, then "jump" by delta to
938    //      the next sub-node which will have a length of length/2.
939    //      (The delta has its own compact encoding.)
940    //      Otherwise, skip the "jump" delta to the next sub-node
941    //      which will have a length of length-length/2.
942
943    // Match-node lead unit values, after masking off intermediate-value bits:
944
945    // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
946    // the length is one more than the next unit.
947
948    // For a branch sub-node with at most this many entries, we drop down
949    // to a linear search.
950    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
951
952    // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
953    /*package*/ static final int kMinLinearMatch=0x30;
954    /*package*/ static final int kMaxLinearMatchLength=0x10;
955
956    // Match-node lead unit bits 14..6 for the optional intermediate value.
957    // If these bits are 0, then there is no intermediate value.
958    // Otherwise, see the *NodeValue* constants below.
959    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x0040
960    /*package*/ static final int kNodeTypeMask=kMinValueLead-1;  // 0x003f
961
962    // A final-value node has bit 15 set.
963    /*package*/ static final int kValueIsFinal=0x8000;
964
965    // Compact value: After testing and masking off bit 15, use the following thresholds.
966    /*package*/ static final int kMaxOneUnitValue=0x3fff;
967
968    /*package*/ static final int kMinTwoUnitValueLead=kMaxOneUnitValue+1;  // 0x4000
969    /*package*/ static final int kThreeUnitValueLead=0x7fff;
970
971    /*package*/ static final int kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;  // 0x3ffeffff
972
973    // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
974    /*package*/ static final int kMaxOneUnitNodeValue=0xff;
975    /*package*/ static final int kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);  // 0x4040
976    /*package*/ static final int kThreeUnitNodeValueLead=0x7fc0;
977
978    /*package*/ static final int kMaxTwoUnitNodeValue=
979        ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;  // 0xfdffff
980
981    // Compact delta integers.
982    /*package*/ static final int kMaxOneUnitDelta=0xfbff;
983    /*package*/ static final int kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;  // 0xfc00
984    /*package*/ static final int kThreeUnitDeltaLead=0xffff;
985
986    /*package*/ static final int kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;  // 0x03feffff
987
988    // Fixed value referencing the CharsTrie words.
989    private CharSequence chars_;
990    private int root_;
991
992    // Iterator variables.
993
994    // Pointer to next trie unit to read. NULL if no more matches.
995    private int pos_;
996    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
997    private int remainingMatchLength_;
998}
999