1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   Copyright (C) 2010-2012, International Business Machines
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  bytestrie.h
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010sep25
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __BYTESTRIE_H__
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __BYTESTRIE_H__
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * \file
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * \brief C++ API: Trie for mapping byte sequences to integer values.
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringpiece.h"
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ustringtrie.h"
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass ByteSink;
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass BytesTrieBuilder;
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass CharString;
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass UVector32;
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Light-weight, non-const reader class for a BytesTrie.
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Traverses a byte-serialized data structure with minimal state,
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * for mapping byte sequences to non-negative integer values.
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class owns the serialized trie data only if it was constructed by
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * the builder's build() method.
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The public constructor and the copy constructor only alias the data (only copy the pointer).
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * There is no assignment operator.
44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *
45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing.
46103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8
47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API BytesTrie : public UMemory {
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Constructs a BytesTrie reader instance.
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The trieBytes must contain a copy of a byte sequence from the BytesTrieBuilder,
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * starting with the first byte of that sequence.
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The BytesTrie object will not read more bytes than
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * the BytesTrieBuilder generated in the corresponding build() call.
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * The array is not copied/cloned and must not be modified while
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * the BytesTrie object is in use.
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param trieBytes The byte array that contains the serialized trie.
62103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie(const void *trieBytes)
6554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            : ownedArray_(NULL), bytes_(static_cast<const uint8_t *>(trieBytes)),
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(bytes_), remainingMatchLength_(-1) {}
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Destructor.
70103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ~BytesTrie();
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Copy constructor, copies the other trie reader object and its state,
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * but not the byte array which will be shared. (Shallow copy.)
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param other Another BytesTrie object.
78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie(const BytesTrie &other)
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            : ownedArray_(NULL), bytes_(other.bytes_),
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(other.pos_), remainingMatchLength_(other.remainingMatchLength_) {}
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Resets this trie to its initial state.
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
87103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie &reset() {
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=bytes_;
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        remainingMatchLength_=-1;
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * BytesTrie state object, for saving a trie's current state
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and resetting the trie back to this state later.
98103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class State : public UMemory {
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Constructs an empty State.
104103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        State() { bytes=NULL; }
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    private:
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        friend class BytesTrie;
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *bytes;
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *pos;
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t remainingMatchLength;
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Saves the state of this trie.
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param state The State object to hold the trie's state.
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see resetToState
120103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const BytesTrie &saveState(State &state) const {
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.bytes=bytes_;
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.pos=pos_;
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        state.remainingMatchLength=remainingMatchLength_;
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Resets this trie to the saved state.
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * If the state object contains no state, or the state of a different trie,
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * then this trie remains unchanged.
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param state The State object which holds a saved trie state.
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return *this
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see saveState
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @see reset
137103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie &resetToState(const State &state) {
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(bytes_==state.bytes && bytes_!=NULL) {
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=state.pos;
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            remainingMatchLength_=state.remainingMatchLength;
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Determines whether the byte sequence so far matches, whether it has a value,
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and whether another input byte can continue a matching byte sequence.
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
151103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult current() const;
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the initial state for this input byte.
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Equivalent to reset().next(inByte).
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *               Values below -0x100 and above 0xff will never match.
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
161103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline UStringTrieResult first(int32_t inByte) {
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        remainingMatchLength_=-1;
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(inByte<0) {
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            inByte+=0x100;
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return nextImpl(bytes_, inByte);
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the current state for this input byte.
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param inByte Input byte value. Values -0x100..-1 are treated like 0..0xff.
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *               Values below -0x100 and above 0xff will never match.
175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
176103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult next(int32_t inByte);
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Traverses the trie from the current state for this byte sequence.
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Equivalent to
183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * \code
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Result result=current();
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * for(each c in s)
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *   if(!USTRINGTRIE_HAS_NEXT(result)) return USTRINGTRIE_NO_MATCH;
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *   result=next(c);
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * return result;
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * \endcode
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param s A string or byte sequence. Can be NULL if length is 0.
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param length The length of the byte sequence. Can be -1 if NUL-terminated.
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The match/value Result.
193103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult next(const char *s, int32_t length);
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Returns a matching byte sequence's value if called immediately after
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * current()/first()/next() returned USTRINGTRIE_INTERMEDIATE_VALUE or USTRINGTRIE_FINAL_VALUE.
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * getValue() can be called multiple times.
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Do not call getValue() after USTRINGTRIE_NO_MATCH or USTRINGTRIE_NO_VALUE!
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return The value for the byte sequence so far.
204103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline int32_t getValue() const {
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *pos=pos_;
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t leadByte=*pos++;
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // U_ASSERT(leadByte>=kMinValueLead);
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return readValue(pos, leadByte>>1);
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Determines whether all byte sequences reachable from the current state
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * map to the same value.
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param uniqueValue Receives the unique value, if this function returns TRUE.
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *                    (output-only)
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return TRUE if all byte sequences reachable from the current state
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *         map to the same value.
220103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline UBool hasUniqueValue(int32_t &uniqueValue) const {
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *pos=pos_;
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Skip the rest of a pending linear-match node.
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos!=NULL && findUniqueValue(pos+remainingMatchLength_+1, FALSE, uniqueValue);
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Finds each byte which continues the byte sequence from the current state.
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * That is, each byte b for which it would be next(b)!=USTRINGTRIE_NO_MATCH now.
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param out Each next byte is appended to this object.
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *            (Only uses the out.Append(s, length) method.)
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return the number of bytes which continue the byte sequence from here
234103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t getNextBytes(ByteSink &out) const;
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Iterator for all of the (byte sequence, value) pairs in a BytesTrie.
240103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius     * @stable ICU 4.8
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class U_COMMON_API Iterator : public UMemory {
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Iterates from the root of a byte-serialized BytesTrie.
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param trieBytes The trie bytes.
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                        Otherwise, the iterator returns strings with this maximum length.
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
253103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator(const void *trieBytes, int32_t maxStringLength, UErrorCode &errorCode);
256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Iterates from the current state of the specified BytesTrie.
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param trie The trie whose state will be copied for iteration.
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param maxStringLength If 0, the iterator returns full strings/byte sequences.
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                        Otherwise, the iterator returns strings with this maximum length.
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
266103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator(const BytesTrie &trie, int32_t maxStringLength, UErrorCode &errorCode);
269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Destructor.
272103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ~Iterator();
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Resets this iterator to its initial state.
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return *this
279103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Iterator &reset();
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return TRUE if there are more elements.
285103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool hasNext() const;
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Finds the next (byte sequence, value) pair if there is one.
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * If the byte sequence is truncated to the maximum length and does not
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * have a real value, then the value is set to -1.
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * In this case, this "not a real value" is indistinguishable from
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * a real value of -1.
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param errorCode Standard ICU error code. Its input value must
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  pass the U_SUCCESS() test, or else the function returns
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  immediately. Check for U_FAILURE() on output or use with
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *                  function chaining. (See User Guide for details.)
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return TRUE if there is another element.
301103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool next(UErrorCode &errorCode);
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return The NUL-terminated byte sequence for the last successful next().
307103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const StringPiece &getString() const { return sp_; }
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return The value for the last successful next().
312103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius         * @stable ICU 4.8
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t getValue() const { return value_; }
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    private:
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool truncateAndStop();
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode);
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *bytes_;
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *pos_;
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        const uint8_t *initialPos_;
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t remainingMatchLength_;
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t initialRemainingMatchLength_;
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        CharString *str_;
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        StringPiece sp_;
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t maxLength_;
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value_;
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The stack stores pairs of integers for backtracking to another
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // outbound edge of a branch node.
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The first integer is an offset from bytes_.
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // The second integer has the str_->length() from before the node in bits 15..0,
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // and the remaining branch length in bits 24..16. (Bits 31..25 are unused.)
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // (We could store the remaining branch length minus 1 in bits 23..16 and not use bits 31..24,
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // but the code looks more confusing that way.)
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UVector32 *stack_;
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    friend class BytesTrieBuilder;
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Constructs a BytesTrie reader instance.
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Unlike the public constructor which just aliases an array,
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * this constructor adopts the builder's array.
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * This constructor is only called by the builder.
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie(void *adoptBytes, const void *trieBytes)
35254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius            : ownedArray_(static_cast<uint8_t *>(adoptBytes)),
35354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius              bytes_(static_cast<const uint8_t *>(trieBytes)),
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho              pos_(bytes_), remainingMatchLength_(-1) {}
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // No assignment operator.
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie &operator=(const BytesTrie &other);
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    inline void stop() {
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=NULL;
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Reads a compact 32-bit integer.
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // pos is already after the leadByte, and the lead byte is already shifted right by 1.
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static int32_t readValue(const uint8_t *pos, int32_t leadByte);
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const uint8_t *skipValue(const uint8_t *pos, int32_t leadByte) {
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // U_ASSERT(leadByte>=kMinValueLead);
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(leadByte>=(kMinTwoByteValueLead<<1)) {
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(leadByte<(kMinThreeByteValueLead<<1)) {
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(leadByte<(kFourByteValueLead<<1)) {
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=3+((leadByte>>1)&1);
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos;
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const uint8_t *skipValue(const uint8_t *pos) {
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t leadByte=*pos++;
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return skipValue(pos, leadByte);
382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Reads a jump delta and jumps.
385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const uint8_t *jumpByDelta(const uint8_t *pos);
386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline const uint8_t *skipDelta(const uint8_t *pos) {
388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t delta=*pos++;
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(delta>=kMinTwoByteDeltaLead) {
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(delta<kMinThreeByteDeltaLead) {
391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(delta<kFourByteDeltaLead) {
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=2;
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=3+(delta&1);
396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return pos;
399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static inline UStringTrieResult valueResult(int32_t node) {
402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return (UStringTrieResult)(USTRINGTRIE_INTERMEDIATE_VALUE-(node&kValueIsFinal));
403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Handles a branch node for both next(byte) and next(string).
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult branchNext(const uint8_t *pos, int32_t length, int32_t inByte);
407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Requires remainingLength_<0.
409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UStringTrieResult nextImpl(const uint8_t *pos, int32_t inByte);
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Helper functions for hasUniqueValue().
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Recursively finds a unique value (or whether there is not a unique one)
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // from a branch.
414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const uint8_t *findUniqueValueFromBranch(const uint8_t *pos, int32_t length,
415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                                    UBool haveUniqueValue, int32_t &uniqueValue);
416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Recursively finds a unique value (or whether there is not a unique one)
417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // starting from a position on a node lead byte.
418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static UBool findUniqueValue(const uint8_t *pos, UBool haveUniqueValue, int32_t &uniqueValue);
419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Helper functions for getNextBytes().
421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // getNextBytes() when pos is on a branch node.
422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static void getNextBranchBytes(const uint8_t *pos, int32_t length, ByteSink &out);
423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static void append(ByteSink &out, int c);
424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // BytesTrie data structure
426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The trie consists of a series of byte-serialized nodes for incremental
428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // string/byte sequence matching. The root node is at the beginning of the trie data.
429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Types of nodes are distinguished by their node lead byte ranges.
431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // After each node, except a final-value node, another node follows to
432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // encode match values or continue matching further bytes.
433b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //
434b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Node types:
435b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Value node: Stores a 32-bit integer in a compact, variable-length format.
436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    The value is for the string/byte sequence so far.
437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    One node bit indicates whether the value is final or whether
438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    matching continues with the next node.
439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Linear-match node: Matches a number of bytes.
440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //  - Branch node: Branches to other nodes according to the current input byte.
441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    The node byte is the length of the branch (number of bytes to select from)
442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    minus 1. It is followed by a sub-node:
443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    - If the length is at most kMaxBranchLinearSubNodeLength, then
444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      there are length-1 (key, value) pairs and then one more comparison byte.
445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If one of the key bytes matches, then the value is either a final value for
446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      the string/byte sequence so far, or a "jump" delta to the next node.
447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If the last byte matches, then matching continues with the next node.
448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      (Values have the same encoding as value nodes.)
449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //    - If the length is greater than kMaxBranchLinearSubNodeLength, then
450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      there is one byte and one "jump" delta.
451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      If the input byte is less than the sub-node byte, then "jump" by delta to
452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      the next sub-node which will have a length of length/2.
453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      (The delta has its own compact encoding.)
454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      Otherwise, skip the "jump" delta to the next sub-node
455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //      which will have a length of length-length/2.
456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Node lead byte values.
458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // 00..0f: Branch node. If node!=0 then the length is node+1, otherwise
460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the length is one more than the next byte.
461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // For a branch sub-node with at most this many entries, we drop down
463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // to a linear search.
464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxBranchLinearSubNodeLength=5;
465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // 10..1f: Linear-match node, match 1..16 bytes and continue reading the next node.
467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinLinearMatch=0x10;
468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxLinearMatchLength=0x10;
469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // 20..ff: Variable-length value node.
471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // If odd, the value is final. (Otherwise, intermediate value or jump delta.)
472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Then shift-right by 1 bit.
473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The remaining lead byte value indicates the number of following bytes (0..4)
474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // and contains the value's top bits.
475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;  // 0x20
476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // It is a final value if bit 0 is set.
477b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kValueIsFinal=1;
478b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Compact value: After testing bit 0, shift right by 1 and then use the following thresholds.
480b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinOneByteValueLead=kMinValueLead/2;  // 0x10
481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxOneByteValue=0x40;  // At least 6 bits in the first byte.
482b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
483b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinTwoByteValueLead=kMinOneByteValueLead+kMaxOneByteValue+1;  // 0x51
484b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxTwoByteValue=0x1aff;
485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
486b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinThreeByteValueLead=kMinTwoByteValueLead+(kMaxTwoByteValue>>8)+1;  // 0x6c
487b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kFourByteValueLead=0x7e;
488b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
489b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // A little more than Unicode code points. (0x11ffff)
490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxThreeByteValue=((kFourByteValueLead-kMinThreeByteValueLead)<<16)-1;
491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
492b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kFiveByteValueLead=0x7f;
493b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
494b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Compact delta integers.
495b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxOneByteDelta=0xbf;
496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinTwoByteDeltaLead=kMaxOneByteDelta+1;  // 0xc0
497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMinThreeByteDeltaLead=0xf0;
498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kFourByteDeltaLead=0xfe;
499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kFiveByteDeltaLead=0xff;
500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxTwoByteDelta=((kMinThreeByteDeltaLead-kMinTwoByteDeltaLead)<<8)-1;  // 0x2fff
502b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxThreeByteDelta=((kFourByteDeltaLead-kMinThreeByteDeltaLead)<<16)-1;  // 0xdffff
503b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
504b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uint8_t *ownedArray_;
505b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
506b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Fixed value referencing the BytesTrie bytes.
507b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const uint8_t *bytes_;
508b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
509b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Iterator variables.
510b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
511b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Pointer to next trie byte to read. NULL if no more matches.
512b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const uint8_t *pos_;
513b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Remaining length of a linear-match node, minus 1. Negative if not in such a node.
514b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t remainingMatchLength_;
515b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
516b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
517b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
518b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
519b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif  // __BYTESTRIE_H__
520