1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html#License
3/*
4*******************************************************************************
5*   Copyright (C) 2011-2014, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   created on: 2011jan06
9*   created by: Markus W. Scherer
10*   ported from ICU4C ucharstrie.h/.cpp
11*/
12
13package com.ibm.icu.util;
14
15import java.io.IOException;
16import java.util.ArrayList;
17import java.util.NoSuchElementException;
18
19import com.ibm.icu.text.UTF16;
20import com.ibm.icu.util.BytesTrie.Result;
21
22/**
23 * Light-weight, non-const reader class for a CharsTrie.
24 * Traverses a char-serialized data structure with minimal state,
25 * for mapping strings (16-bit-unit sequences) to non-negative integer values.
26 *
27 * <p>This class is not intended for public subclassing.
28 *
29 * @stable ICU 4.8
30 * @author Markus W. Scherer
31 */
32public final class CharsTrie implements Cloneable, Iterable<CharsTrie.Entry> {
33    /**
34     * Constructs a CharsTrie reader instance.
35     *
36     * <p>The CharSequence must contain a copy of a char sequence from the CharsTrieBuilder,
37     * with the offset indicating the first char of that sequence.
38     * The CharsTrie object will not read more chars than
39     * the CharsTrieBuilder generated in the corresponding build() call.
40     *
41     * <p>The CharSequence is not copied/cloned and must not be modified while
42     * the CharsTrie object is in use.
43     *
44     * @param trieChars CharSequence that contains the serialized trie.
45     * @param offset Root offset of the trie in the CharSequence.
46     * @stable ICU 4.8
47     */
48    public CharsTrie(CharSequence trieChars, int offset) {
49        chars_=trieChars;
50        pos_=root_=offset;
51        remainingMatchLength_=-1;
52    }
53
54    /**
55     * Clones this trie reader object and its state,
56     * but not the char array which will be shared.
57     * @return A shallow clone of this trie.
58     * @stable ICU 4.8
59     */
60    @Override
61    public Object clone() throws CloneNotSupportedException {
62        return super.clone();  // A shallow copy is just what we need.
63    }
64
65    /**
66     * Resets this trie to its initial state.
67     * @return this
68     * @stable ICU 4.8
69     */
70    public CharsTrie reset() {
71        pos_=root_;
72        remainingMatchLength_=-1;
73        return this;
74    }
75
76    /**
77     * CharsTrie state object, for saving a trie's current state
78     * and resetting the trie back to this state later.
79     * @stable ICU 4.8
80     */
81    public static final class State {
82        /**
83         * Constructs an empty State.
84         * @stable ICU 4.8
85         */
86        public State() {}
87        private CharSequence chars;
88        private int root;
89        private int pos;
90        private int remainingMatchLength;
91    }
92
93    /**
94     * Saves the state of this trie.
95     * @param state The State object to hold the trie's state.
96     * @return this
97     * @see #resetToState
98     * @stable ICU 4.8
99     */
100    public CharsTrie saveState(State state) /*const*/ {
101        state.chars=chars_;
102        state.root=root_;
103        state.pos=pos_;
104        state.remainingMatchLength=remainingMatchLength_;
105        return this;
106    }
107
108    /**
109     * Resets this trie to the saved state.
110     * @param state The State object which holds a saved trie state.
111     * @return this
112     * @throws IllegalArgumentException if the state object contains no state,
113     *         or the state of a different trie
114     * @see #saveState
115     * @see #reset
116     * @stable ICU 4.8
117     */
118    public CharsTrie resetToState(State state) {
119        if(chars_==state.chars && chars_!=null && root_==state.root) {
120            pos_=state.pos;
121            remainingMatchLength_=state.remainingMatchLength;
122        } else {
123            throw new IllegalArgumentException("incompatible trie state");
124        }
125        return this;
126    }
127
128    /**
129     * Determines whether the string so far matches, whether it has a value,
130     * and whether another input char can continue a matching string.
131     * @return The match/value Result.
132     * @stable ICU 4.8
133     */
134    public Result current() /*const*/ {
135        int pos=pos_;
136        if(pos<0) {
137            return Result.NO_MATCH;
138        } else {
139            int node;
140            return (remainingMatchLength_<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
141                    valueResults_[node>>15] : Result.NO_VALUE;
142        }
143    }
144
145    /**
146     * Traverses the trie from the initial state for this input char.
147     * Equivalent to reset().next(inUnit).
148     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
149     * @return The match/value Result.
150     * @stable ICU 4.8
151     */
152    public Result first(int inUnit) {
153        remainingMatchLength_=-1;
154        return nextImpl(root_, inUnit);
155    }
156
157    /**
158     * Traverses the trie from the initial state for the
159     * one or two UTF-16 code units for this input code point.
160     * Equivalent to reset().nextForCodePoint(cp).
161     * @param cp A Unicode code point 0..0x10ffff.
162     * @return The match/value Result.
163     * @stable ICU 4.8
164     */
165    public Result firstForCodePoint(int cp) {
166        return cp<=0xffff ?
167            first(cp) :
168            (first(UTF16.getLeadSurrogate(cp)).hasNext() ?
169                next(UTF16.getTrailSurrogate(cp)) :
170                Result.NO_MATCH);
171    }
172
173    /**
174     * Traverses the trie from the current state for this input char.
175     * @param inUnit Input char value. Values below 0 and above 0xffff will never match.
176     * @return The match/value Result.
177     * @stable ICU 4.8
178     */
179    public Result next(int inUnit) {
180        int pos=pos_;
181        if(pos<0) {
182            return Result.NO_MATCH;
183        }
184        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
185        if(length>=0) {
186            // Remaining part of a linear-match node.
187            if(inUnit==chars_.charAt(pos++)) {
188                remainingMatchLength_=--length;
189                pos_=pos;
190                int node;
191                return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
192                        valueResults_[node>>15] : Result.NO_VALUE;
193            } else {
194                stop();
195                return Result.NO_MATCH;
196            }
197        }
198        return nextImpl(pos, inUnit);
199    }
200
201    /**
202     * Traverses the trie from the current state for the
203     * one or two UTF-16 code units for this input code point.
204     * @param cp A Unicode code point 0..0x10ffff.
205     * @return The match/value Result.
206     * @stable ICU 4.8
207     */
208    public Result nextForCodePoint(int cp) {
209        return cp<=0xffff ?
210            next(cp) :
211            (next(UTF16.getLeadSurrogate(cp)).hasNext() ?
212                next(UTF16.getTrailSurrogate(cp)) :
213                Result.NO_MATCH);
214    }
215
216    /**
217     * Traverses the trie from the current state for this string.
218     * Equivalent to
219     * <pre>
220     * Result result=current();
221     * for(each c in s)
222     *   if(!result.hasNext()) return Result.NO_MATCH;
223     *   result=next(c);
224     * return result;
225     * </pre>
226     * @param s Contains a string.
227     * @param sIndex The start index of the string in s.
228     * @param sLimit The (exclusive) end index of the string in s.
229     * @return The match/value Result.
230     * @stable ICU 4.8
231     */
232    public Result next(CharSequence s, int sIndex, int sLimit) {
233        if(sIndex>=sLimit) {
234            // Empty input.
235            return current();
236        }
237        int pos=pos_;
238        if(pos<0) {
239            return Result.NO_MATCH;
240        }
241        int length=remainingMatchLength_;  // Actual remaining match length minus 1.
242        for(;;) {
243            // Fetch the next input unit, if there is one.
244            // Continue a linear-match node.
245            char inUnit;
246            for(;;) {
247                if(sIndex==sLimit) {
248                    remainingMatchLength_=length;
249                    pos_=pos;
250                    int node;
251                    return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
252                            valueResults_[node>>15] : Result.NO_VALUE;
253                }
254                inUnit=s.charAt(sIndex++);
255                if(length<0) {
256                    remainingMatchLength_=length;
257                    break;
258                }
259                if(inUnit!=chars_.charAt(pos)) {
260                    stop();
261                    return Result.NO_MATCH;
262                }
263                ++pos;
264                --length;
265            }
266            int node=chars_.charAt(pos++);
267            for(;;) {
268                if(node<kMinLinearMatch) {
269                    Result result=branchNext(pos, node, inUnit);
270                    if(result==Result.NO_MATCH) {
271                        return Result.NO_MATCH;
272                    }
273                    // Fetch the next input unit, if there is one.
274                    if(sIndex==sLimit) {
275                        return result;
276                    }
277                    if(result==Result.FINAL_VALUE) {
278                        // No further matching units.
279                        stop();
280                        return Result.NO_MATCH;
281                    }
282                    inUnit=s.charAt(sIndex++);
283                    pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
284                    node=chars_.charAt(pos++);
285                } else if(node<kMinValueLead) {
286                    // Match length+1 units.
287                    length=node-kMinLinearMatch;  // Actual match length minus 1.
288                    if(inUnit!=chars_.charAt(pos)) {
289                        stop();
290                        return Result.NO_MATCH;
291                    }
292                    ++pos;
293                    --length;
294                    break;
295                } else if((node&kValueIsFinal)!=0) {
296                    // No further matching units.
297                    stop();
298                    return Result.NO_MATCH;
299                } else {
300                    // Skip intermediate value.
301                    pos=skipNodeValue(pos, node);
302                    node&=kNodeTypeMask;
303                }
304            }
305        }
306    }
307
308    /**
309     * Returns a matching string's value if called immediately after
310     * current()/first()/next() returned Result.INTERMEDIATE_VALUE or Result.FINAL_VALUE.
311     * getValue() can be called multiple times.
312     *
313     * Do not call getValue() after Result.NO_MATCH or Result.NO_VALUE!
314     * @return The value for the string so far.
315     * @stable ICU 4.8
316     */
317    public int getValue() /*const*/ {
318        int pos=pos_;
319        int leadUnit=chars_.charAt(pos++);
320        assert(leadUnit>=kMinValueLead);
321        return (leadUnit&kValueIsFinal)!=0 ?
322            readValue(chars_, pos, leadUnit&0x7fff) : readNodeValue(chars_, pos, leadUnit);
323    }
324
325    /**
326     * Determines whether all strings reachable from the current state
327     * map to the same value, and if so, returns that value.
328     * @return The unique value in bits 32..1 with bit 0 set,
329     *         if all strings reachable from the current state
330     *         map to the same value; otherwise returns 0.
331     * @stable ICU 4.8
332     */
333    public long getUniqueValue() /*const*/ {
334        int pos=pos_;
335        if(pos<0) {
336            return 0;
337        }
338        // Skip the rest of a pending linear-match node.
339        long uniqueValue=findUniqueValue(chars_, pos+remainingMatchLength_+1, 0);
340        // Ignore internally used bits 63..33; extend the actual value's sign bit from bit 32.
341        return (uniqueValue<<31)>>31;
342    }
343
344    /**
345     * Finds each char which continues the string from the current state.
346     * That is, each char c for which it would be next(c)!=Result.NO_MATCH now.
347     * @param out Each next char is appended to this object.
348     *            (Only uses the out.append(c) method.)
349     * @return The number of chars which continue the string from here.
350     * @stable ICU 4.8
351     */
352    public int getNextChars(Appendable out) /*const*/ {
353        int pos=pos_;
354        if(pos<0) {
355            return 0;
356        }
357        if(remainingMatchLength_>=0) {
358            append(out, chars_.charAt(pos));  // Next unit of a pending linear-match node.
359            return 1;
360        }
361        int node=chars_.charAt(pos++);
362        if(node>=kMinValueLead) {
363            if((node&kValueIsFinal)!=0) {
364                return 0;
365            } else {
366                pos=skipNodeValue(pos, node);
367                node&=kNodeTypeMask;
368            }
369        }
370        if(node<kMinLinearMatch) {
371            if(node==0) {
372                node=chars_.charAt(pos++);
373            }
374            getNextBranchChars(chars_, pos, ++node, out);
375            return node;
376        } else {
377            // First unit of the linear-match node.
378            append(out, chars_.charAt(pos));
379            return 1;
380        }
381    }
382
383    /**
384     * Iterates from the current state of this trie.
385     * @return A new CharsTrie.Iterator.
386     * @stable ICU 4.8
387     */
388    @Override
389    public Iterator iterator() {
390        return new Iterator(chars_, pos_, remainingMatchLength_, 0);
391    }
392
393    /**
394     * Iterates from the current state of this trie.
395     * @param maxStringLength If 0, the iterator returns full strings.
396     *                        Otherwise, the iterator returns strings with this maximum length.
397     * @return A new CharsTrie.Iterator.
398     * @stable ICU 4.8
399     */
400    public Iterator iterator(int maxStringLength) {
401        return new Iterator(chars_, pos_, remainingMatchLength_, maxStringLength);
402    }
403
404    /**
405     * Iterates from the root of a char-serialized BytesTrie.
406     * @param trieChars CharSequence that contains the serialized trie.
407     * @param offset Root offset of the trie in the CharSequence.
408     * @param maxStringLength If 0, the iterator returns full strings.
409     *                        Otherwise, the iterator returns strings with this maximum length.
410     * @return A new CharsTrie.Iterator.
411     * @stable ICU 4.8
412     */
413    public static Iterator iterator(CharSequence trieChars, int offset, int maxStringLength) {
414        return new Iterator(trieChars, offset, -1, maxStringLength);
415    }
416
417    /**
418     * Return value type for the Iterator.
419     * @stable ICU 4.8
420     */
421    public static final class Entry {
422        /**
423         * The string.
424         * @stable ICU 4.8
425         */
426        public CharSequence chars;
427        /**
428         * The value associated with the string.
429         * @stable ICU 4.8
430         */
431        public int value;
432
433        private Entry() {
434        }
435    }
436
437    /**
438     * Iterator for all of the (string, value) pairs in a CharsTrie.
439     * @stable ICU 4.8
440     */
441    public static final class Iterator implements java.util.Iterator<Entry> {
442        private Iterator(CharSequence trieChars, int offset, int remainingMatchLength, int maxStringLength) {
443            chars_=trieChars;
444            pos_=initialPos_=offset;
445            remainingMatchLength_=initialRemainingMatchLength_=remainingMatchLength;
446            maxLength_=maxStringLength;
447            int length=remainingMatchLength_;  // Actual remaining match length minus 1.
448            if(length>=0) {
449                // Pending linear-match node, append remaining bytes to str_.
450                ++length;
451                if(maxLength_>0 && length>maxLength_) {
452                    length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
453                }
454                str_.append(chars_, pos_, pos_+length);
455                pos_+=length;
456                remainingMatchLength_-=length;
457            }
458        }
459
460        /**
461         * Resets this iterator to its initial state.
462         * @return this
463         * @stable ICU 4.8
464         */
465        public Iterator reset() {
466            pos_=initialPos_;
467            remainingMatchLength_=initialRemainingMatchLength_;
468            skipValue_=false;
469            int length=remainingMatchLength_+1;  // Remaining match length.
470            if(maxLength_>0 && length>maxLength_) {
471                length=maxLength_;
472            }
473            str_.setLength(length);
474            pos_+=length;
475            remainingMatchLength_-=length;
476            stack_.clear();
477            return this;
478        }
479
480        /**
481         * @return true if there are more elements.
482         * @stable ICU 4.8
483         */
484        @Override
485        public boolean hasNext() /*const*/ { return pos_>=0 || !stack_.isEmpty(); }
486
487        /**
488         * Finds the next (string, value) pair if there is one.
489         *
490         * If the string is truncated to the maximum length and does not
491         * have a real value, then the value is set to -1.
492         * In this case, this "not a real value" is indistinguishable from
493         * a real value of -1.
494         * @return An Entry with the string and value of the next element.
495         * @throws NoSuchElementException - iteration has no more elements.
496         * @stable ICU 4.8
497         */
498        @Override
499        public Entry next() {
500            int pos=pos_;
501            if(pos<0) {
502                if(stack_.isEmpty()) {
503                    throw new NoSuchElementException();
504                }
505                // Pop the state off the stack and continue with the next outbound edge of
506                // the branch node.
507                long top=stack_.remove(stack_.size()-1);
508                int length=(int)top;
509                pos=(int)(top>>32);
510                str_.setLength(length&0xffff);
511                length>>>=16;
512                if(length>1) {
513                    pos=branchNext(pos, length);
514                    if(pos<0) {
515                        return entry_;  // Reached a final value.
516                    }
517                } else {
518                    str_.append(chars_.charAt(pos++));
519                }
520            }
521            if(remainingMatchLength_>=0) {
522                // We only get here if we started in a pending linear-match node
523                // with more than maxLength remaining units.
524                return truncateAndStop();
525            }
526            for(;;) {
527                int node=chars_.charAt(pos++);
528                if(node>=kMinValueLead) {
529                    if(skipValue_) {
530                        pos=skipNodeValue(pos, node);
531                        node&=kNodeTypeMask;
532                        skipValue_=false;
533                    } else {
534                        // Deliver value for the string so far.
535                        boolean isFinal=(node&kValueIsFinal)!=0;
536                        if(isFinal) {
537                            entry_.value=readValue(chars_, pos, node&0x7fff);
538                        } else {
539                            entry_.value=readNodeValue(chars_, pos, node);
540                        }
541                        if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
542                            pos_=-1;
543                        } else {
544                            // We cannot skip the value right here because it shares its
545                            // lead unit with a match node which we have to evaluate
546                            // next time.
547                            // Instead, keep pos_ on the node lead unit itself.
548                            pos_=pos-1;
549                            skipValue_=true;
550                        }
551                        entry_.chars=str_;
552                        return entry_;
553                    }
554                }
555                if(maxLength_>0 && str_.length()==maxLength_) {
556                    return truncateAndStop();
557                }
558                if(node<kMinLinearMatch) {
559                    if(node==0) {
560                        node=chars_.charAt(pos++);
561                    }
562                    pos=branchNext(pos, node+1);
563                    if(pos<0) {
564                        return entry_;  // Reached a final value.
565                    }
566                } else {
567                    // Linear-match node, append length units to str_.
568                    int length=node-kMinLinearMatch+1;
569                    if(maxLength_>0 && str_.length()+length>maxLength_) {
570                        str_.append(chars_, pos, pos+maxLength_-str_.length());
571                        return truncateAndStop();
572                    }
573                    str_.append(chars_, pos, pos+length);
574                    pos+=length;
575                }
576            }
577        }
578
579        /**
580         * Iterator.remove() is not supported.
581         * @throws UnsupportedOperationException (always)
582         * @stable ICU 4.8
583         */
584        @Override
585        public void remove() {
586            throw new UnsupportedOperationException();
587        }
588
589        private Entry truncateAndStop() {
590            pos_=-1;
591            // We reset entry_.chars every time we return entry_
592            // just because the caller might have modified the Entry.
593            entry_.chars=str_;
594            entry_.value=-1;  // no real value for str
595            return entry_;
596        }
597
598        private int branchNext(int pos, int length) {
599            while(length>kMaxBranchLinearSubNodeLength) {
600                ++pos;  // ignore the comparison unit
601                // Push state for the greater-or-equal edge.
602                stack_.add(((long)skipDelta(chars_, pos)<<32)|((length-(length>>1))<<16)|str_.length());
603                // Follow the less-than edge.
604                length>>=1;
605                pos=jumpByDelta(chars_, pos);
606            }
607            // List of key-value pairs where values are either final values or jump deltas.
608            // Read the first (key, value) pair.
609            char trieUnit=chars_.charAt(pos++);
610            int node=chars_.charAt(pos++);
611            boolean isFinal=(node&kValueIsFinal)!=0;
612            int value=readValue(chars_, pos, node&=0x7fff);
613            pos=skipValue(pos, node);
614            stack_.add(((long)pos<<32)|((length-1)<<16)|str_.length());
615            str_.append(trieUnit);
616            if(isFinal) {
617                pos_=-1;
618                entry_.chars=str_;
619                entry_.value=value;
620                return -1;
621            } else {
622                return pos+value;
623            }
624        }
625
626        private CharSequence chars_;
627        private int pos_;
628        private int initialPos_;
629        private int remainingMatchLength_;
630        private int initialRemainingMatchLength_;
631        private boolean skipValue_;  // Skip intermediate value which was already delivered.
632
633        private StringBuilder str_=new StringBuilder();
634        private int maxLength_;
635        private Entry entry_=new Entry();
636
637        // The stack stores longs for backtracking to another
638        // outbound edge of a branch node.
639        // Each long has the offset in chars_ in bits 62..32,
640        // the str_.length() from before the node in bits 15..0,
641        // and the remaining branch length in bits 31..16.
642        // (We could store the remaining branch length minus 1 in bits 30..16 and not use bit 31,
643        // but the code looks more confusing that way.)
644        private ArrayList<Long> stack_=new ArrayList<Long>();
645    }
646
647    private void stop() {
648        pos_=-1;
649    }
650
651    // Reads a compact 32-bit integer.
652    // pos is already after the leadUnit, and the lead unit has bit 15 reset.
653    private static int readValue(CharSequence chars, int pos, int leadUnit) {
654        int value;
655        if(leadUnit<kMinTwoUnitValueLead) {
656            value=leadUnit;
657        } else if(leadUnit<kThreeUnitValueLead) {
658            value=((leadUnit-kMinTwoUnitValueLead)<<16)|chars.charAt(pos);
659        } else {
660            value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
661        }
662        return value;
663    }
664    private static int skipValue(int pos, int leadUnit) {
665        if(leadUnit>=kMinTwoUnitValueLead) {
666            if(leadUnit<kThreeUnitValueLead) {
667                ++pos;
668            } else {
669                pos+=2;
670            }
671        }
672        return pos;
673    }
674    private static int skipValue(CharSequence chars, int pos) {
675        int leadUnit=chars.charAt(pos++);
676        return skipValue(pos, leadUnit&0x7fff);
677    }
678
679    private static int readNodeValue(CharSequence chars, int pos, int leadUnit) {
680        assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
681        int value;
682        if(leadUnit<kMinTwoUnitNodeValueLead) {
683            value=(leadUnit>>6)-1;
684        } else if(leadUnit<kThreeUnitNodeValueLead) {
685            value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|chars.charAt(pos);
686        } else {
687            value=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
688        }
689        return value;
690    }
691    private static int skipNodeValue(int pos, int leadUnit) {
692        assert(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
693        if(leadUnit>=kMinTwoUnitNodeValueLead) {
694            if(leadUnit<kThreeUnitNodeValueLead) {
695                ++pos;
696            } else {
697                pos+=2;
698            }
699        }
700        return pos;
701    }
702
703    private static int jumpByDelta(CharSequence chars, int pos) {
704        int delta=chars.charAt(pos++);
705        if(delta>=kMinTwoUnitDeltaLead) {
706            if(delta==kThreeUnitDeltaLead) {
707                delta=(chars.charAt(pos)<<16)|chars.charAt(pos+1);
708                pos+=2;
709            } else {
710                delta=((delta-kMinTwoUnitDeltaLead)<<16)|chars.charAt(pos++);
711            }
712        }
713        return pos+delta;
714    }
715
716    private static int skipDelta(CharSequence chars, int pos) {
717        int delta=chars.charAt(pos++);
718        if(delta>=kMinTwoUnitDeltaLead) {
719            if(delta==kThreeUnitDeltaLead) {
720                pos+=2;
721            } else {
722                ++pos;
723            }
724        }
725        return pos;
726    }
727
728    private static Result[] valueResults_={ Result.INTERMEDIATE_VALUE, Result.FINAL_VALUE };
729
730    // Handles a branch node for both next(unit) and next(string).
731    private Result branchNext(int pos, int length, int inUnit) {
732        // Branch according to the current unit.
733        if(length==0) {
734            length=chars_.charAt(pos++);
735        }
736        ++length;
737        // The length of the branch is the number of units to select from.
738        // The data structure encodes a binary search.
739        while(length>kMaxBranchLinearSubNodeLength) {
740            if(inUnit<chars_.charAt(pos++)) {
741                length>>=1;
742                pos=jumpByDelta(chars_, pos);
743            } else {
744                length=length-(length>>1);
745                pos=skipDelta(chars_, pos);
746            }
747        }
748        // Drop down to linear search for the last few units.
749        // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
750        // and divides length by 2.
751        do {
752            if(inUnit==chars_.charAt(pos++)) {
753                Result result;
754                int node=chars_.charAt(pos);
755                if((node&kValueIsFinal)!=0) {
756                    // Leave the final value for getValue() to read.
757                    result=Result.FINAL_VALUE;
758                } else {
759                    // Use the non-final value as the jump delta.
760                    ++pos;
761                    // int delta=readValue(pos, node);
762                    int delta;
763                    if(node<kMinTwoUnitValueLead) {
764                        delta=node;
765                    } else if(node<kThreeUnitValueLead) {
766                        delta=((node-kMinTwoUnitValueLead)<<16)|chars_.charAt(pos++);
767                    } else {
768                        delta=(chars_.charAt(pos)<<16)|chars_.charAt(pos+1);
769                        pos+=2;
770                    }
771                    // end readValue()
772                    pos+=delta;
773                    node=chars_.charAt(pos);
774                    result= node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
775                }
776                pos_=pos;
777                return result;
778            }
779            --length;
780            pos=skipValue(chars_, pos);
781        } while(length>1);
782        if(inUnit==chars_.charAt(pos++)) {
783            pos_=pos;
784            int node=chars_.charAt(pos);
785            return node>=kMinValueLead ? valueResults_[node>>15] : Result.NO_VALUE;
786        } else {
787            stop();
788            return Result.NO_MATCH;
789        }
790    }
791
792    // Requires remainingLength_<0.
793    private Result nextImpl(int pos, int inUnit) {
794        int node=chars_.charAt(pos++);
795        for(;;) {
796            if(node<kMinLinearMatch) {
797                return branchNext(pos, node, inUnit);
798            } else if(node<kMinValueLead) {
799                // Match the first of length+1 units.
800                int length=node-kMinLinearMatch;  // Actual match length minus 1.
801                if(inUnit==chars_.charAt(pos++)) {
802                    remainingMatchLength_=--length;
803                    pos_=pos;
804                    return (length<0 && (node=chars_.charAt(pos))>=kMinValueLead) ?
805                            valueResults_[node>>15] : Result.NO_VALUE;
806                } else {
807                    // No match.
808                    break;
809                }
810            } else if((node&kValueIsFinal)!=0) {
811                // No further matching units.
812                break;
813            } else {
814                // Skip intermediate value.
815                pos=skipNodeValue(pos, node);
816                node&=kNodeTypeMask;
817            }
818        }
819        stop();
820        return Result.NO_MATCH;
821    }
822
823    // Helper functions for getUniqueValue().
824    // Recursively finds a unique value (or whether there is not a unique one)
825    // from a branch.
826    // uniqueValue: On input, same as for getUniqueValue()/findUniqueValue().
827    // On return, if not 0, then bits 63..33 contain the updated non-negative pos.
828    private static long findUniqueValueFromBranch(CharSequence chars, int pos, int length,
829                                                  long uniqueValue) {
830        while(length>kMaxBranchLinearSubNodeLength) {
831            ++pos;  // ignore the comparison unit
832            uniqueValue=findUniqueValueFromBranch(chars, jumpByDelta(chars, pos), length>>1, uniqueValue);
833            if(uniqueValue==0) {
834                return 0;
835            }
836            length=length-(length>>1);
837            pos=skipDelta(chars, pos);
838        }
839        do {
840            ++pos;  // ignore a comparison unit
841            // handle its value
842            int node=chars.charAt(pos++);
843            boolean isFinal=(node&kValueIsFinal)!=0;
844            node&=0x7fff;
845            int value=readValue(chars, pos, node);
846            pos=skipValue(pos, node);
847            if(isFinal) {
848                if(uniqueValue!=0) {
849                    if(value!=(int)(uniqueValue>>1)) {
850                        return 0;
851                    }
852                } else {
853                    uniqueValue=((long)value<<1)|1;
854                }
855            } else {
856                uniqueValue=findUniqueValue(chars, pos+value, uniqueValue);
857                if(uniqueValue==0) {
858                    return 0;
859                }
860            }
861        } while(--length>1);
862        // ignore the last comparison byte
863        return ((long)(pos+1)<<33)|(uniqueValue&0x1ffffffffL);
864    }
865    // Recursively finds a unique value (or whether there is not a unique one)
866    // starting from a position on a node lead unit.
867    // uniqueValue: If there is one, then bits 32..1 contain the value and bit 0 is set.
868    // Otherwise, uniqueValue is 0. Bits 63..33 are ignored.
869    private static long findUniqueValue(CharSequence chars, int pos, long uniqueValue) {
870        int node=chars.charAt(pos++);
871        for(;;) {
872            if(node<kMinLinearMatch) {
873                if(node==0) {
874                    node=chars.charAt(pos++);
875                }
876                uniqueValue=findUniqueValueFromBranch(chars, pos, node+1, uniqueValue);
877                if(uniqueValue==0) {
878                    return 0;
879                }
880                pos=(int)(uniqueValue>>>33);
881                node=chars.charAt(pos++);
882            } else if(node<kMinValueLead) {
883                // linear-match node
884                pos+=node-kMinLinearMatch+1;  // Ignore the match units.
885                node=chars.charAt(pos++);
886            } else {
887                boolean isFinal=(node&kValueIsFinal)!=0;
888                int value;
889                if(isFinal) {
890                    value=readValue(chars, pos, node&0x7fff);
891                } else {
892                    value=readNodeValue(chars, pos, node);
893                }
894                if(uniqueValue!=0) {
895                    if(value!=(int)(uniqueValue>>1)) {
896                        return 0;
897                    }
898                } else {
899                    uniqueValue=((long)value<<1)|1;
900                }
901                if(isFinal) {
902                    return uniqueValue;
903                }
904                pos=skipNodeValue(pos, node);
905                node&=kNodeTypeMask;
906            }
907        }
908    }
909
910    // Helper functions for getNextChars().
911    // getNextChars() when pos is on a branch node.
912    private static void getNextBranchChars(CharSequence chars, int pos, int length, Appendable out) {
913        while(length>kMaxBranchLinearSubNodeLength) {
914            ++pos;  // ignore the comparison unit
915            getNextBranchChars(chars, jumpByDelta(chars, pos), length>>1, out);
916            length=length-(length>>1);
917            pos=skipDelta(chars, pos);
918        }
919        do {
920            append(out, chars.charAt(pos++));
921            pos=skipValue(chars, pos);
922        } while(--length>1);
923        append(out, chars.charAt(pos));
924    }
925    private static void append(Appendable out, int c) {
926        try {
927            out.append((char)c);
928        } catch(IOException e) {
929            throw new ICUUncheckedIOException(e);
930        }
931    }
932
933    // CharsTrie data structure
934    //
935    // The trie consists of a series of char-serialized nodes for incremental
936    // Unicode string/char sequence matching. (char=16-bit unsigned integer)
937    // The root node is at the beginning of the trie data.
938    //
939    // Types of nodes are distinguished by their node lead unit ranges.
940    // After each node, except a final-value node, another node follows to
941    // encode match values or continue matching further units.
942    //
943    // Node types:
944    //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
945    //    The value is for the string/char sequence so far.
946    //  - Match node, optionally with an intermediate value in a different compact format.
947    //    The value, if present, is for the string/char sequence so far.
948    //
949    //  Aside from the value, which uses the node lead unit's high bits:
950    //
951    //  - Linear-match node: Matches a number of units.
952    //  - Branch node: Branches to other nodes according to the current input unit.
953    //    The node unit is the length of the branch (number of units to select from)
954    //    minus 1. It is followed by a sub-node:
955    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
956    //      there are length-1 (key, value) pairs and then one more comparison unit.
957    //      If one of the key units matches, then the value is either a final value for
958    //      the string so far, or a "jump" delta to the next node.
959    //      If the last unit matches, then matching continues with the next node.
960    //      (Values have the same encoding as final-value nodes.)
961    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
962    //      there is one unit and one "jump" delta.
963    //      If the input unit is less than the sub-node unit, then "jump" by delta to
964    //      the next sub-node which will have a length of length/2.
965    //      (The delta has its own compact encoding.)
966    //      Otherwise, skip the "jump" delta to the next sub-node
967    //      which will have a length of length-length/2.
968
969    // Match-node lead unit values, after masking off intermediate-value bits:
970
971    // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
972    // the length is one more than the next unit.
973
974    // For a branch sub-node with at most this many entries, we drop down
975    // to a linear search.
976    /*package*/ static final int kMaxBranchLinearSubNodeLength=5;
977
978    // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
979    /*package*/ static final int kMinLinearMatch=0x30;
980    /*package*/ static final int kMaxLinearMatchLength=0x10;
981
982    // Match-node lead unit bits 14..6 for the optional intermediate value.
983    // If these bits are 0, then there is no intermediate value.
984    // Otherwise, see the *NodeValue* constants below.
985    /*package*/ static final int kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x0040
986    /*package*/ static final int kNodeTypeMask=kMinValueLead-1;  // 0x003f
987
988    // A final-value node has bit 15 set.
989    /*package*/ static final int kValueIsFinal=0x8000;
990
991    // Compact value: After testing and masking off bit 15, use the following thresholds.
992    /*package*/ static final int kMaxOneUnitValue=0x3fff;
993
994    /*package*/ static final int kMinTwoUnitValueLead=kMaxOneUnitValue+1;  // 0x4000
995    /*package*/ static final int kThreeUnitValueLead=0x7fff;
996
997    /*package*/ static final int kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;  // 0x3ffeffff
998
999    // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
1000    /*package*/ static final int kMaxOneUnitNodeValue=0xff;
1001    /*package*/ static final int kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);  // 0x4040
1002    /*package*/ static final int kThreeUnitNodeValueLead=0x7fc0;
1003
1004    /*package*/ static final int kMaxTwoUnitNodeValue=
1005        ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;  // 0xfdffff
1006
1007    // Compact delta integers.
1008    /*package*/ static final int kMaxOneUnitDelta=0xfbff;
1009    /*package*/ static final int kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;  // 0xfc00
1010    /*package*/ static final int kThreeUnitDeltaLead=0xffff;
1011
1012    /*package*/ static final int kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;  // 0x03feffff
1013
1014    // Fixed value referencing the CharsTrie words.
1015    private CharSequence chars_;
1016    private int root_;
1017
1018    // Iterator variables.
1019
1020    // Pointer to next trie unit to read. NULL if no more matches.
1021    private int pos_;
1022    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
1023    private int remainingMatchLength_;
1024}
1025