1/* 2******************************************************************************* 3* Copyright (C) 2010-2013, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* file name: bytestriebuilder.h 7* encoding: US-ASCII 8* tab size: 8 (not used) 9* indentation:4 10* 11* created on: 2010sep25 12* created by: Markus W. Scherer 13*/ 14 15/** 16 * \file 17 * \brief C++ API: Builder for icu::BytesTrie 18 */ 19 20#ifndef __BYTESTRIEBUILDER_H__ 21#define __BYTESTRIEBUILDER_H__ 22 23#include "unicode/utypes.h" 24#include "unicode/bytestrie.h" 25#include "unicode/stringpiece.h" 26#include "unicode/stringtriebuilder.h" 27 28U_NAMESPACE_BEGIN 29 30class BytesTrieElement; 31class CharString; 32 33/** 34 * Builder class for BytesTrie. 35 * 36 * This class is not intended for public subclassing. 37 * @stable ICU 4.8 38 */ 39class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 40public: 41 /** 42 * Constructs an empty builder. 43 * @param errorCode Standard ICU error code. 44 * @stable ICU 4.8 45 */ 46 BytesTrieBuilder(UErrorCode &errorCode); 47 48 /** 49 * Destructor. 50 * @stable ICU 4.8 51 */ 52 virtual ~BytesTrieBuilder(); 53 54 /** 55 * Adds a (byte sequence, value) pair. 56 * The byte sequence must be unique. 57 * The bytes will be copied; the builder does not keep 58 * a reference to the input StringPiece or its data(). 59 * @param s The input byte sequence. 60 * @param value The value associated with this byte sequence. 61 * @param errorCode Standard ICU error code. Its input value must 62 * pass the U_SUCCESS() test, or else the function returns 63 * immediately. Check for U_FAILURE() on output or use with 64 * function chaining. (See User Guide for details.) 65 * @return *this 66 * @stable ICU 4.8 67 */ 68 BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); 69 70 /** 71 * Builds a BytesTrie for the add()ed data. 72 * Once built, no further data can be add()ed until clear() is called. 73 * 74 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 75 * must have been add()ed. 76 * 77 * This method passes ownership of the builder's internal result array to the new trie object. 78 * Another call to any build() variant will re-serialize the trie. 79 * After clear() has been called, a new array will be used as well. 80 * @param buildOption Build option, see UStringTrieBuildOption. 81 * @param errorCode Standard ICU error code. Its input value must 82 * pass the U_SUCCESS() test, or else the function returns 83 * immediately. Check for U_FAILURE() on output or use with 84 * function chaining. (See User Guide for details.) 85 * @return A new BytesTrie for the add()ed data. 86 * @stable ICU 4.8 87 */ 88 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 89 90 /** 91 * Builds a BytesTrie for the add()ed data and byte-serializes it. 92 * Once built, no further data can be add()ed until clear() is called. 93 * 94 * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 95 * must have been add()ed. 96 * 97 * Multiple calls to buildStringPiece() return StringPieces referring to the 98 * builder's same byte array, without rebuilding. 99 * If buildStringPiece() is called after build(), the trie will be 100 * re-serialized into a new array. 101 * If build() is called after buildStringPiece(), the trie object will become 102 * the owner of the previously returned array. 103 * After clear() has been called, a new array will be used as well. 104 * @param buildOption Build option, see UStringTrieBuildOption. 105 * @param errorCode Standard ICU error code. Its input value must 106 * pass the U_SUCCESS() test, or else the function returns 107 * immediately. Check for U_FAILURE() on output or use with 108 * function chaining. (See User Guide for details.) 109 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 110 * @stable ICU 4.8 111 */ 112 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 113 114 /** 115 * Removes all (byte sequence, value) pairs. 116 * New data can then be add()ed and a new trie can be built. 117 * @return *this 118 * @stable ICU 4.8 119 */ 120 BytesTrieBuilder &clear(); 121 122private: 123 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 124 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 125 126 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 127 128 virtual int32_t getElementStringLength(int32_t i) const; 129 virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; 130 virtual int32_t getElementValue(int32_t i) const; 131 132 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 133 134 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 135 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 136 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; 137 138 virtual UBool matchNodesCanHaveValues() const { return FALSE; } 139 140 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } 141 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } 142 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 143 144#ifndef U_HIDE_INTERNAL_API 145 /** 146 * @internal 147 */ 148 class BTLinearMatchNode : public LinearMatchNode { 149 public: 150 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 151 virtual UBool operator==(const Node &other) const; 152 virtual void write(StringTrieBuilder &builder); 153 private: 154 const char *s; 155 }; 156#endif /* U_HIDE_INTERNAL_API */ 157 158 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 159 Node *nextNode) const; 160 161 UBool ensureCapacity(int32_t length); 162 virtual int32_t write(int32_t byte); 163 int32_t write(const char *b, int32_t length); 164 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 165 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 166 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 167 virtual int32_t writeDeltaTo(int32_t jumpTarget); 168 169 CharString *strings; // Pointer not object so we need not #include internal charstr.h. 170 BytesTrieElement *elements; 171 int32_t elementsCapacity; 172 int32_t elementsLength; 173 174 // Byte serialization of the trie. 175 // Grows from the back: bytesLength measures from the end of the buffer! 176 char *bytes; 177 int32_t bytesCapacity; 178 int32_t bytesLength; 179}; 180 181U_NAMESPACE_END 182 183#endif // __BYTESTRIEBUILDER_H__ 184