164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius*   Copyright (C) 2010-2012, International Business Machines
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  ucharstrie.h
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010nov14
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __UCHARSTRIE_H__
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __UCHARSTRIE_H__
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * \file
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * \brief C++ API: Trie for mapping Unicode strings (or 16-bit-unit sequences)
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *                 to integer values.
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/unistr.h"
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ustringtrie.h"
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass Appendable;
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass UCharsTrieBuilder;
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass UVector32;
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Light-weight, non-const reader class for a UCharsTrie.
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Traverses a UChar-serialized data structure with minimal state,
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * for mapping strings (16-bit-unit sequences) to non-negative integer values.
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class owns the serialized trie data only if it was constructed by
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * the builder's build() method.
44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The public constructor and the copy constructor only alias the data (only copy the pointer).
45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * There is no assignment operator.
46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *
47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing.
4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius * @stable ICU 4.8
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API UCharsTrie : public UMemory {
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Constructs a UCharsTrie reader instance.
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The trieUChars must contain a copy of a UChar sequence from the UCharsTrieBuilder,
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * starting with the first UChar of that sequence.
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The UCharsTrie object will not read more UChars than
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * the UCharsTrieBuilder generated in the corresponding build() call.
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The array is not copied/cloned and must not be modified while
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * the UCharsTrie object is in use.
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param trieUChars The UChar array that contains the serialized trie.
6483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie(const UChar *trieUChars)
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            : ownedArray_(NULL), uchars_(trieUChars),
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(uchars_), remainingMatchLength_(-1) {}
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Destructor.
7283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ~UCharsTrie();
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Copy constructor, copies the other trie reader object and its state,
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * but not the UChar array which will be shared. (Shallow copy.)
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param other Another UCharsTrie object.
8083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie(const UCharsTrie &other)
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            : ownedArray_(NULL), uchars_(other.uchars_),
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Resets this trie to its initial state.
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
8983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie &reset() {
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=uchars_;
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        remainingMatchLength_=-1;
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * UCharsTrie state object, for saving a trie's current state
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and resetting the trie back to this state later.
10083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class State : public UMemory {
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Constructs an empty State.
10683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        State() { uchars=NULL; }
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    private:
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        friend class UCharsTrie;
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *uchars;
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *pos;
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t remainingMatchLength;
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Saves the state of this trie.
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param state The State object to hold the trie's state.
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see resetToState
12283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UCharsTrie &saveState(State &state) const {
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.uchars=uchars_;
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.pos=pos_;
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.remainingMatchLength=remainingMatchLength_;
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Resets this trie to the saved state.
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * If the state object contains no state, or the state of a different trie,
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * then this trie remains unchanged.
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param state The State object which holds a saved trie state.
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see saveState
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see reset
13983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie &resetToState(const State &state) {
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchars_==state.uchars && uchars_!=NULL) {
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=state.pos;
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            remainingMatchLength_=state.remainingMatchLength;
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Determines whether the string so far matches, whether it has a value,
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and whether another input UChar can continue a matching string.
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
15383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult current() const;
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the initial state for this input UChar.
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Equivalent to reset().next(uchar).
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param uchar Input char value. Values below 0 and above 0xffff will never match.
161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
16283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline UStringTrieResult first(int32_t uchar) {
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        remainingMatchLength_=-1;
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return nextImpl(uchars_, uchar);
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the initial state for the
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * one or two UTF-16 code units for this input code point.
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Equivalent to reset().nextForCodePoint(cp).
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param cp A Unicode code point 0..0x10ffff.
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
17583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
17783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UStringTrieResult firstForCodePoint(UChar32 cp);
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the current state for this input UChar.
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param uchar Input char value. Values below 0 and above 0xffff will never match.
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
18383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult next(int32_t uchar);
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the current state for the
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * one or two UTF-16 code units for this input code point.
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param cp A Unicode code point 0..0x10ffff.
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
19283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
19483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    UStringTrieResult nextForCodePoint(UChar32 cp);
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the current state for this string.
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Equivalent to
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * \code
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Result result=current();
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * for(each c in s)
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *   if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *   result=next(c);
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * return result;
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * \endcode
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param s A string. Can be NULL if length is 0.
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param length The length of the string. Can be -1 if NUL-terminated.
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
20983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult next(const UChar *s, int32_t length);
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Returns a matching string's value if called immediately after
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * getValue() can be called multiple times.
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The value for the string so far.
22083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline int32_t getValue() const {
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *pos=pos_;
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t leadUnit=*pos++;
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // U_ASSERT(leadUnit>=kMinValueLead);
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return leadUnit&kValueIsFinal ?
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            readValue(pos, leadUnit&0x7fff) : readNodeValue(pos, leadUnit);
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Determines whether all strings reachable from the current state
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * map to the same value.
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param uniqueValue Receives the unique value, if this function returns TRUE.
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *                    (output-only)
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return TRUE if all strings reachable from the current state
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *         map to the same value.
23783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline UBool hasUniqueValue(int32_t &uniqueValue) const {
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *pos=pos_;
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Skip the rest of a pending linear-match node.
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Finds each UChar which continues the string from the current state.
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * That is, each UChar c for which it would be next(c)!=USTRINGTRIE_NO_MATCH now.
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param out Each next UChar is appended to this object.
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return the number of UChars which continue the string from here
25083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t getNextUChars(Appendable &out) const;
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Iterator for all of the (string, value) pairs in a UCharsTrie.
25683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius     * @stable ICU 4.8
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class U_COMMON_API Iterator : public UMemory {
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Iterates from the root of a UChar-serialized UCharsTrie.
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param trieUChars The trie UChars.
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param maxStringLength If 0, the iterator returns full strings.
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                        Otherwise, the iterator returns strings with this maximum length.
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
26983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator(const UChar *trieUChars, int32_t maxStringLength, UErrorCode &errorCode);
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Iterates from the current state of the specified UCharsTrie.
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param trie The trie whose state will be copied for iteration.
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param maxStringLength If 0, the iterator returns full strings.
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                        Otherwise, the iterator returns strings with this maximum length.
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
28283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Destructor.
28883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ~Iterator();
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Resets this iterator to its initial state.
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return *this
29583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator &reset();
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return TRUE if there are more elements.
30183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool hasNext() const;
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Finds the next (string, value) pair if there is one.
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * If the string is truncated to the maximum length and does not
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * have a real value, then the value is set to -1.
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * In this case, this "not a real value" is indistinguishable from
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * a real value of -1.
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return TRUE if there is another element.
31783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool next(UErrorCode &errorCode);
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return The string for the last successful next().
32383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UnicodeString &getString() const { return str_; }
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return The value for the last successful next().
32883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius         * @stable ICU 4.8
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t getValue() const { return value_; }
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    private:
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool truncateAndStop() {
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=NULL;
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value_=-1;  // no real value for str
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return TRUE;
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode);
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *uchars_;
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *pos_;
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const UChar *initialPos_;
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t remainingMatchLength_;
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t initialRemainingMatchLength_;
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool skipValue_;  // Skip intermediate value which was already delivered.
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UnicodeString str_;
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t maxLength_;
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value_;
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The stack stores pairs of integers for backtracking to another
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // outbound edge of a branch node.
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The first integer is an offset from uchars_.
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The second integer has the str_.length() from before the node in bits 15..0,
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // and the remaining branch length in bits 31..16.
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // (We could store the remaining branch length minus 1 in bits 30..16 and not use the sign bit,
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // but the code looks more confusing that way.)
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UVector32 *stack_;
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    friend class UCharsTrieBuilder;
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Constructs a UCharsTrie reader instance.
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Unlike the public constructor which just aliases an array,
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * this constructor adopts the builder's array.
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * This constructor is only called by the builder.
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie(UChar *adoptUChars, const UChar *trieUChars)
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            : ownedArray_(adoptUChars), uchars_(trieUChars),
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(uchars_), remainingMatchLength_(-1) {}
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // No assignment operator.
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UCharsTrie &operator=(const UCharsTrie &other);
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline void stop() {
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=NULL;
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Reads a compact 32-bit integer.
383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // pos is already after the leadUnit, and the lead unit has bit 15 reset.
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline int32_t readValue(const UChar *pos, int32_t leadUnit) {
385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value;
386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(leadUnit<kMinTwoUnitValueLead) {
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=leadUnit;
388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(leadUnit<kThreeUnitValueLead) {
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=(pos[0]<<16)|pos[1];
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return value;
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const UChar *skipValue(const UChar *pos, int32_t leadUnit) {
396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(leadUnit>=kMinTwoUnitValueLead) {
397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(leadUnit<kThreeUnitValueLead) {
398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos;
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const UChar *skipValue(const UChar *pos) {
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t leadUnit=*pos++;
407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return skipValue(pos, leadUnit&0x7fff);
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline int32_t readNodeValue(const UChar *pos, int32_t leadUnit) {
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value;
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(leadUnit<kMinTwoUnitNodeValueLead) {
414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=(leadUnit>>6)-1;
415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(leadUnit<kThreeUnitNodeValueLead) {
416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=(pos[0]<<16)|pos[1];
419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return value;
421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const UChar *skipNodeValue(const UChar *pos, int32_t leadUnit) {
423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // U_ASSERT(kMinValueLead<=leadUnit && leadUnit<kValueIsFinal);
424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(leadUnit>=kMinTwoUnitNodeValueLead) {
425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(leadUnit<kThreeUnitNodeValueLead) {
426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos;
432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
433b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
434b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const UChar *jumpByDelta(const UChar *pos) {
435b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t delta=*pos++;
436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(delta>=kMinTwoUnitDeltaLead) {
437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(delta==kThreeUnitDeltaLead) {
438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                delta=(pos[0]<<16)|pos[1];
439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos+delta;
445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const UChar *skipDelta(const UChar *pos) {
448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t delta=*pos++;
449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(delta>=kMinTwoUnitDeltaLead) {
450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(delta==kThreeUnitDeltaLead) {
451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos;
457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline UStringTrieResult valueResult(int32_t node) {
460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node>>15));
461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Handles a branch node for both next(uchar) and next(string).
464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult branchNext(const UChar *pos, int32_t length, int32_t uchar);
465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Requires remainingLength_<0.
467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult nextImpl(const UChar *pos, int32_t uchar);
468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Helper functions for hasUniqueValue().
470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Recursively finds a unique value (or whether there is not a unique one)
471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // from a branch.
472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const UChar *findUniqueValueFromBranch(const UChar *pos, int32_t length,
473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                                  UBool haveUniqueValue, int32_t &uniqueValue);
474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Recursively finds a unique value (or whether there is not a unique one)
475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // starting from a position on a node lead unit.
476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static UBool findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue);
477b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
478b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Helper functions for getNextUChars().
479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // getNextUChars() when pos is on a branch node.
480b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static void getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out);
481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
482b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // UCharsTrie data structure
483b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
484b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The trie consists of a series of UChar-serialized nodes for incremental
485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Unicode string/UChar sequence matching. (UChar=16-bit unsigned integer)
486b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The root node is at the beginning of the trie data.
487b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
488b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Types of nodes are distinguished by their node lead unit ranges.
489b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // After each node, except a final-value node, another node follows to
490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // encode match values or continue matching further units.
491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
492b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Node types:
493b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Final-value node: Stores a 32-bit integer in a compact, variable-length format.
494b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    The value is for the string/UChar sequence so far.
495b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Match node, optionally with an intermediate value in a different compact format.
496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    The value, if present, is for the string/UChar sequence so far.
497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  Aside from the value, which uses the node lead unit's high bits:
499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Linear-match node: Matches a number of units.
501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Branch node: Branches to other nodes according to the current input unit.
502b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    The node unit is the length of the branch (number of units to select from)
503b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    minus 1. It is followed by a sub-node:
504b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      there are length-1 (key, value) pairs and then one more comparison unit.
506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If one of the key units matches, then the value is either a final value for
507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      the string so far, or a "jump" delta to the next node.
508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If the last unit matches, then matching continues with the next node.
509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      (Values have the same encoding as final-value nodes.)
510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      there is one unit and one "jump" delta.
512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If the input unit is less than the sub-node unit, then "jump" by delta to
513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      the next sub-node which will have a length of length/2.
514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      (The delta has its own compact encoding.)
515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      Otherwise, skip the "jump" delta to the next sub-node
516b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      which will have a length of length-length/2.
517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Match-node lead unit values, after masking off intermediate-value bits:
519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
520b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // 0000..002f: Branch node. If node!=0 then the length is node+1, otherwise
521b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the length is one more than the next unit.
522b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
523b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // For a branch sub-node with at most this many entries, we drop down
524b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // to a linear search.
525b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxBranchLinearSubNodeLength=5;
526b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
527b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // 0030..003f: Linear-match node, match 1..16 units and continue reading the next node.
528b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinLinearMatch=0x30;
529b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxLinearMatchLength=0x10;
530b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
531b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Match-node lead unit bits 14..6 for the optional intermediate value.
532b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // If these bits are 0, then there is no intermediate value.
533b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Otherwise, see the *NodeValue* constants below.
534b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x0040
535b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kNodeTypeMask=kMinValueLead-1;  // 0x003f
536b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
537b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // A final-value node has bit 15 set.
538b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kValueIsFinal=0x8000;
539b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
540b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Compact value: After testing and masking off bit 15, use the following thresholds.
541b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxOneUnitValue=0x3fff;
542b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
543b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;  // 0x4000
544b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kThreeUnitValueLead=0x7fff;
545b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
546b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;  // 0x3ffeffff
547b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
548b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Compact intermediate-value integer, lead unit shared with a branch or linear-match node.
549b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxOneUnitNodeValue=0xff;
550b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);  // 0x4040
551b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kThreeUnitNodeValueLead=0x7fc0;
552b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
553b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxTwoUnitNodeValue=
554b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;  // 0xfdffff
555b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
556b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Compact delta integers.
557b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxOneUnitDelta=0xfbff;
558b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;  // 0xfc00
559b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kThreeUnitDeltaLead=0xffff;
560b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
561b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;  // 0x03feffff
562b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
563b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UChar *ownedArray_;
564b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
565b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Fixed value referencing the UCharsTrie words.
566b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *uchars_;
567b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
568b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Iterator variables.
569b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
570b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Pointer to next trie unit to read. NULL if no more matches.
571b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos_;
572b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
573b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t remainingMatchLength_;
574b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
575b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
576b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
577b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
578b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif  // __UCHARSTRIE_H__
579