1/*
2*******************************************************************************
3*   Copyright (C) 2010-2013, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  bytestriebuilder.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010sep25
12*   created by: Markus W. Scherer
13*/
14
15/**
16 * \file
17 * \brief C++ API: Builder for icu::BytesTrie
18 */
19
20#ifndef __BYTESTRIEBUILDER_H__
21#define __BYTESTRIEBUILDER_H__
22
23#include "unicode/utypes.h"
24#include "unicode/bytestrie.h"
25#include "unicode/stringpiece.h"
26#include "unicode/stringtriebuilder.h"
27
28U_NAMESPACE_BEGIN
29
30class BytesTrieElement;
31class CharString;
32
33/**
34 * Builder class for BytesTrie.
35 *
36 * This class is not intended for public subclassing.
37 * @stable ICU 4.8
38 */
39class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
40public:
41    /**
42     * Constructs an empty builder.
43     * @param errorCode Standard ICU error code.
44     * @stable ICU 4.8
45     */
46    BytesTrieBuilder(UErrorCode &errorCode);
47
48    /**
49     * Destructor.
50     * @stable ICU 4.8
51     */
52    virtual ~BytesTrieBuilder();
53
54    /**
55     * Adds a (byte sequence, value) pair.
56     * The byte sequence must be unique.
57     * The bytes will be copied; the builder does not keep
58     * a reference to the input StringPiece or its data().
59     * @param s The input byte sequence.
60     * @param value The value associated with this byte sequence.
61     * @param errorCode Standard ICU error code. Its input value must
62     *                  pass the U_SUCCESS() test, or else the function returns
63     *                  immediately. Check for U_FAILURE() on output or use with
64     *                  function chaining. (See User Guide for details.)
65     * @return *this
66     * @stable ICU 4.8
67     */
68    BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode);
69
70    /**
71     * Builds a BytesTrie for the add()ed data.
72     * Once built, no further data can be add()ed until clear() is called.
73     *
74     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
75     * must have been add()ed.
76     *
77     * This method passes ownership of the builder's internal result array to the new trie object.
78     * Another call to any build() variant will re-serialize the trie.
79     * After clear() has been called, a new array will be used as well.
80     * @param buildOption Build option, see UStringTrieBuildOption.
81     * @param errorCode Standard ICU error code. Its input value must
82     *                  pass the U_SUCCESS() test, or else the function returns
83     *                  immediately. Check for U_FAILURE() on output or use with
84     *                  function chaining. (See User Guide for details.)
85     * @return A new BytesTrie for the add()ed data.
86     * @stable ICU 4.8
87     */
88    BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
89
90    /**
91     * Builds a BytesTrie for the add()ed data and byte-serializes it.
92     * Once built, no further data can be add()ed until clear() is called.
93     *
94     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
95     * must have been add()ed.
96     *
97     * Multiple calls to buildStringPiece() return StringPieces referring to the
98     * builder's same byte array, without rebuilding.
99     * If buildStringPiece() is called after build(), the trie will be
100     * re-serialized into a new array.
101     * If build() is called after buildStringPiece(), the trie object will become
102     * the owner of the previously returned array.
103     * After clear() has been called, a new array will be used as well.
104     * @param buildOption Build option, see UStringTrieBuildOption.
105     * @param errorCode Standard ICU error code. Its input value must
106     *                  pass the U_SUCCESS() test, or else the function returns
107     *                  immediately. Check for U_FAILURE() on output or use with
108     *                  function chaining. (See User Guide for details.)
109     * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
110     * @stable ICU 4.8
111     */
112    StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
113
114    /**
115     * Removes all (byte sequence, value) pairs.
116     * New data can then be add()ed and a new trie can be built.
117     * @return *this
118     * @stable ICU 4.8
119     */
120    BytesTrieBuilder &clear();
121
122private:
123    BytesTrieBuilder(const BytesTrieBuilder &other);  // no copy constructor
124    BytesTrieBuilder &operator=(const BytesTrieBuilder &other);  // no assignment operator
125
126    void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
127
128    virtual int32_t getElementStringLength(int32_t i) const;
129    virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const;
130    virtual int32_t getElementValue(int32_t i) const;
131
132    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;
133
134    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
135    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
136    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const;
137
138    virtual UBool matchNodesCanHaveValues() const { return FALSE; }
139
140    virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
141    virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
142    virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
143
144#ifndef U_HIDE_INTERNAL_API
145    /**
146     * @internal
147     */
148    class BTLinearMatchNode : public LinearMatchNode {
149    public:
150        BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
151        virtual UBool operator==(const Node &other) const;
152        virtual void write(StringTrieBuilder &builder);
153    private:
154        const char *s;
155    };
156#endif  /* U_HIDE_INTERNAL_API */
157
158    virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
159                                        Node *nextNode) const;
160
161    UBool ensureCapacity(int32_t length);
162    virtual int32_t write(int32_t byte);
163    int32_t write(const char *b, int32_t length);
164    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
165    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
166    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
167    virtual int32_t writeDeltaTo(int32_t jumpTarget);
168
169    CharString *strings;  // Pointer not object so we need not #include internal charstr.h.
170    BytesTrieElement *elements;
171    int32_t elementsCapacity;
172    int32_t elementsLength;
173
174    // Byte serialization of the trie.
175    // Grows from the back: bytesLength measures from the end of the buffer!
176    char *bytes;
177    int32_t bytesCapacity;
178    int32_t bytesLength;
179};
180
181U_NAMESPACE_END
182
183#endif  // __BYTESTRIEBUILDER_H__
184