1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2010-2011, International Business Machines
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  stringtriebuilder.h
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010dec24
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __STRINGTRIEBUILDER_H__
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __STRINGTRIEBUILDER_H__
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Forward declaration.
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostruct UHashtable;
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehotypedef struct UHashtable UHashtable;
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Build options for BytesTrieBuilder and CharsTrieBuilder.
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoenum UStringTrieBuildOption {
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Builds a trie quickly.
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @draft ICU 4.8
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    USTRINGTRIE_BUILD_FAST,
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Builds a trie more slowly, attempting to generate
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * a shorter but equivalent serialization.
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * This build option also uses more memory.
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * This option can be effective when many integer values are the same
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and string/byte sequence suffixes can be shared.
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Runtime speed is not expected to improve.
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @draft ICU 4.8
44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    USTRINGTRIE_BUILD_SMALL
46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/**
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Base class for string trie builder classes.
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho *
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing.
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API StringTrieBuilder : public UObject {
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static UBool hashNode(const void *node);
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static UBool equalNodes(const void *left, const void *right);
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprotected:
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringTrieBuilder();
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual ~StringTrieBuilder();
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode);
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void deleteCompactBuilder();
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode);
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex);
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length);
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class Node;
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode);
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex,
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            int32_t length, UErrorCode &errorCode);
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getElementStringLength(int32_t i) const = 0;
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0;
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getElementValue(int32_t i) const = 0;
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Finds the first unit index after this one where
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the first and last element have different units again.
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0;
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Number of different units at unitIndex.
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0;
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0;
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0;
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual UBool matchNodesCanHaveValues() const = 0;
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getMaxBranchLinearSubNodeLength() const = 0;
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getMinLinearMatch() const = 0;
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t getMaxLinearMatchLength() const = 0;
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength).
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxBranchLinearSubNodeLength=5;
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units.
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up.
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    static const int32_t kMaxSplitBranchLevels=14;
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Makes sure that there is only one unique node registered that is
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * equivalent to newNode.
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param newNode Input node. The builder takes ownership.
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param errorCode ICU in/out UErrorCode.
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return newNode if it is the first of its kind, or
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *         an equivalent node if newNode is a duplicate.
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @internal
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    Node *registerNode(Node *newNode, UErrorCode &errorCode);
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /**
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Makes sure that there is only one unique FinalValueNode registered
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * with this value.
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Avoids creating a node if the value is a duplicate.
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param value A final value.
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @param errorCode ICU in/out UErrorCode.
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL.
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @return A FinalValueNode with the given value.
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * @internal
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    Node *registerFinalValue(int32_t value, UErrorCode &errorCode);
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /*
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * C++ note:
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * registerNode() and registerFinalValue() take ownership of their input nodes,
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * and only return owned nodes.
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * If they see a failure UErrorCode, they will delete the input node.
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR.
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * If there is a failure, they return NULL.
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * NULL Node pointers can be safely passed into other Nodes because
161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * they call the static Node::hashCode() which checks for a NULL pointer first.
162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     *
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * Therefore, as long as builder functions register a new node,
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * they need to check for failures only before explicitly dereferencing
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     * a Node pointer, or before setting a new UErrorCode.
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho     */
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Hash set of nodes, maps from nodes to integer 1.
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    UHashtable *nodes;
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class Node : public UObject {
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node(int32_t initialHash) : hash(initialHash), offset(0) {}
176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        inline int32_t hashCode() const { return hash; }
177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Handles node==NULL.
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); }
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Base class operator==() compares the actual class types.
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        inline UBool operator!=(const Node &other) const { return !operator==(other); }
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        /**
183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Traverses the Node graph and numbers branch edges, with rightmost edges first.
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * This is to avoid writing a duplicate node twice.
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Branch nodes in this trie data structure are not symmetric.
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Most branch edges "jump" to other nodes but the rightmost branch edges
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * just continue without a jump.
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Therefore, write() must write the rightmost branch edge last
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * (trie units are written backwards), and must write it at that point even if
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * it is a duplicate of a node previously written elsewhere.
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * This function visits and marks right branch edges first.
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * Edges are numbered with increasingly negative values because we share the
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * offset field which gets positive values when nodes are written.
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * A branch edge also remembers the first number for any of its edges.
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * When a further-left branch edge has a number in the range of the rightmost
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * edge's numbers, then it will be written as part of the required right edge
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * and we can avoid writing it first.
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * edge numbers.
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @param edgeNumber The first edge number for this node and its sub-nodes.
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         * @return An edge number that is at least the maximum-negative
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         *         of the input edge number and the numbers of this node and all of its sub-nodes.
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho         */
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // write() must set the offset to a positive value.
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder) = 0;
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // See markRightEdgesFirst.
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight,
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                               StringTrieBuilder &builder) {
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Note: Edge numbers are negative, lastRight<=firstRight.
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // If offset>0 then this node and its sub-nodes have been written already
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // and we need not write them again.
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // If this node is part of the unwritten right branch edge,
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // then we wait until that is written.
220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(offset<0 && (offset<lastRight || firstRight<offset)) {
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                write(builder);
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        inline int32_t getOffset() const { return offset; }
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t hash;
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t offset;
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    private:
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // No ICU "poor man's RTTI" for this class nor its subclasses.
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UClassID getDynamicClassID() const;
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // This class should not be overridden because
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // registerFinalValue() compares a stack-allocated FinalValueNode
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes)
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // with the input node, and the
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // !Node::operator==(other) used inside FinalValueNode::operator==(other)
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // will be false if the typeid's are different.
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class FinalValueNode : public Node {
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {}
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder);
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value;
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class ValueNode : public Node {
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {}
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        void setValue(int32_t v) {
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            hasValue=TRUE;
256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            value=v;
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            hash=hash*37+v;
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool hasValue;
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value;
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class IntermediateValueNode : public ValueNode {
266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        IntermediateValueNode(int32_t v, Node *nextNode)
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); }
269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder);
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *next;
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class LinearMatchNode : public ValueNode {
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        LinearMatchNode(int32_t len, Node *nextNode)
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                : ValueNode((0x333333*37+len)*37+hashCode(nextNode)),
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  length(len), next(nextNode) {}
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t length;
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *next;
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class BranchNode : public Node {
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        BranchNode(int32_t initialHash) : Node(initialHash) {}
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t firstEdgeNumber;
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class ListBranchNode : public BranchNode {
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ListBranchNode() : BranchNode(0x444444), length(0) {}
301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder);
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Adds a unit with a final value.
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        void add(int32_t c, int32_t value) {
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            units[length]=(UChar)c;
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            equal[length]=NULL;
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            values[length]=value;
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++length;
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            hash=(hash*37+c)*37+value;
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Adds a unit which leads to another match node.
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        void add(int32_t c, Node *node) {
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            units[length]=(UChar)c;
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            equal[length]=node;
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            values[length]=0;
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++length;
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            hash=(hash*37+c)*37+hashCode(node);
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *equal[kMaxBranchLinearSubNodeLength];  // NULL means "has final value".
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t length;
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t values[kMaxBranchLinearSubNodeLength];
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar units[kMaxBranchLinearSubNodeLength];
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class SplitBranchNode : public BranchNode {
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode)
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                : BranchNode(((0x555555*37+middleUnit)*37+
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                              hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)),
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {}
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder);
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UChar unit;
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *lessThan;
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *greaterOrEqual;
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Branch head node, for writing the actual node lead unit.
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    class BranchHeadNode : public ValueNode {
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    public:
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        BranchHeadNode(int32_t len, Node *subNode)
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                : ValueNode((0x666666*37+len)*37+hashCode(subNode)),
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                  length(len), next(subNode) {}
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual UBool operator==(const Node &other) const;
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual int32_t markRightEdgesFirst(int32_t edgeNumber);
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        virtual void write(StringTrieBuilder &builder);
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    protected:
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t length;
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        Node *next;  // A branch sub-node.
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    };
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length,
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                        Node *nextNode) const = 0;
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t write(int32_t unit) = 0;
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0;
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0;
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0;
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    /** @internal */
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0;
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // No ICU "poor man's RTTI" for this class nor its subclasses.
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    virtual UClassID getDynamicClassID() const;
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif  // __STRINGTRIEBUILDER_H__
381