1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*   Copyright (C) 2010-2016, International Business Machines
6*   Corporation and others.  All Rights Reserved.
7*******************************************************************************
8*   file name:  bytestriebuilder.h
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2010sep25
14*   created by: Markus W. Scherer
15*/
16
17/**
18 * \file
19 * \brief C++ API: Builder for icu::BytesTrie
20 */
21
22#ifndef __BYTESTRIEBUILDER_H__
23#define __BYTESTRIEBUILDER_H__
24
25#include "unicode/utypes.h"
26#include "unicode/bytestrie.h"
27#include "unicode/stringpiece.h"
28#include "unicode/stringtriebuilder.h"
29
30U_NAMESPACE_BEGIN
31
32class BytesTrieElement;
33class CharString;
34/**
35 * Builder class for BytesTrie.
36 *
37 * This class is not intended for public subclassing.
38 * @stable ICU 4.8
39 */
40class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder {
41public:
42    /**
43     * Constructs an empty builder.
44     * @param errorCode Standard ICU error code.
45     * @stable ICU 4.8
46     */
47    BytesTrieBuilder(UErrorCode &errorCode);
48
49    /**
50     * Destructor.
51     * @stable ICU 4.8
52     */
53    virtual ~BytesTrieBuilder();
54
55    /**
56     * Adds a (byte sequence, value) pair.
57     * The byte sequence must be unique.
58     * The bytes will be copied; the builder does not keep
59     * a reference to the input StringPiece or its data().
60     * @param s The input byte sequence.
61     * @param value The value associated with this byte sequence.
62     * @param errorCode Standard ICU error code. Its input value must
63     *                  pass the U_SUCCESS() test, or else the function returns
64     *                  immediately. Check for U_FAILURE() on output or use with
65     *                  function chaining. (See User Guide for details.)
66     * @return *this
67     * @stable ICU 4.8
68     */
69    BytesTrieBuilder &add(StringPiece s, int32_t value, UErrorCode &errorCode);
70
71    /**
72     * Builds a BytesTrie for the add()ed data.
73     * Once built, no further data can be add()ed until clear() is called.
74     *
75     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
76     * must have been add()ed.
77     *
78     * This method passes ownership of the builder's internal result array to the new trie object.
79     * Another call to any build() variant will re-serialize the trie.
80     * After clear() has been called, a new array will be used as well.
81     * @param buildOption Build option, see UStringTrieBuildOption.
82     * @param errorCode Standard ICU error code. Its input value must
83     *                  pass the U_SUCCESS() test, or else the function returns
84     *                  immediately. Check for U_FAILURE() on output or use with
85     *                  function chaining. (See User Guide for details.)
86     * @return A new BytesTrie for the add()ed data.
87     * @stable ICU 4.8
88     */
89    BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
90
91    /**
92     * Builds a BytesTrie for the add()ed data and byte-serializes it.
93     * Once built, no further data can be add()ed until clear() is called.
94     *
95     * A BytesTrie cannot be empty. At least one (byte sequence, value) pair
96     * must have been add()ed.
97     *
98     * Multiple calls to buildStringPiece() return StringPieces referring to the
99     * builder's same byte array, without rebuilding.
100     * If buildStringPiece() is called after build(), the trie will be
101     * re-serialized into a new array.
102     * If build() is called after buildStringPiece(), the trie object will become
103     * the owner of the previously returned array.
104     * After clear() has been called, a new array will be used as well.
105     * @param buildOption Build option, see UStringTrieBuildOption.
106     * @param errorCode Standard ICU error code. Its input value must
107     *                  pass the U_SUCCESS() test, or else the function returns
108     *                  immediately. Check for U_FAILURE() on output or use with
109     *                  function chaining. (See User Guide for details.)
110     * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data.
111     * @stable ICU 4.8
112     */
113    StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
114
115    /**
116     * Removes all (byte sequence, value) pairs.
117     * New data can then be add()ed and a new trie can be built.
118     * @return *this
119     * @stable ICU 4.8
120     */
121    BytesTrieBuilder &clear();
122
123private:
124    BytesTrieBuilder(const BytesTrieBuilder &other);  // no copy constructor
125    BytesTrieBuilder &operator=(const BytesTrieBuilder &other);  // no assignment operator
126
127    void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode);
128
129    virtual int32_t getElementStringLength(int32_t i) const;
130    virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const;
131    virtual int32_t getElementValue(int32_t i) const;
132
133    virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const;
134
135    virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const;
136    virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const;
137    virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const;
138
139    virtual UBool matchNodesCanHaveValues() const { return FALSE; }
140
141    virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; }
142    virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; }
143    virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; }
144
145    /**
146     * @internal
147     */
148    class BTLinearMatchNode : public LinearMatchNode {
149    public:
150        BTLinearMatchNode(const char *units, int32_t len, Node *nextNode);
151        virtual UBool operator==(const Node &other) const;
152        virtual void write(StringTrieBuilder &builder);
153    private:
154        const char *s;
155    };
156
157    // don't use #ifndef U_HIDE_INTERNAL_API with private class members or virtual methods.
158    virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
159                                        Node *nextNode) const;
160
161    UBool ensureCapacity(int32_t length);
162    virtual int32_t write(int32_t byte);
163    int32_t write(const char *b, int32_t length);
164    virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length);
165    virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal);
166    virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node);
167    virtual int32_t writeDeltaTo(int32_t jumpTarget);
168
169    CharString *strings;  // Pointer not object so we need not #include internal charstr.h.
170    BytesTrieElement *elements;
171    int32_t elementsCapacity;
172    int32_t elementsLength;
173
174    // Byte serialization of the trie.
175    // Grows from the back: bytesLength measures from the end of the buffer!
176    char *bytes;
177    int32_t bytesCapacity;
178    int32_t bytesLength;
179};
180
181U_NAMESPACE_END
182
183#endif  // __BYTESTRIEBUILDER_H__
184