1/* 2******************************************************************************* 3* Copyright (C) 2010-2011, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* file name: bytestriebuilder.h 7* encoding: US-ASCII 8* tab size: 8 (not used) 9* indentation:4 10* 11* created on: 2010sep25 12* created by: Markus W. Scherer 13*/ 14 15#ifndef __BYTESTRIEBUILDER_H__ 16#define __BYTESTRIEBUILDER_H__ 17 18#include "unicode/utypes.h" 19#include "unicode/bytestrie.h" 20#include "unicode/stringpiece.h" 21#include "unicode/stringtriebuilder.h" 22 23U_NAMESPACE_BEGIN 24 25class BytesTrieElement; 26class CharString; 27 28/** 29 * Builder class for BytesTrie. 30 * 31 * This class is not intended for public subclassing. 32 * @draft ICU 4.8 33 */ 34class U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 35public: 36 /** 37 * Constructs an empty builder. 38 * @param errorCode Standard ICU error code. 39 * @draft ICU 4.8 40 */ 41 BytesTrieBuilder(UErrorCode &errorCode); 42 43 /** 44 * Destructor. 45 * @draft ICU 4.8 46 */ 47 virtual ~BytesTrieBuilder(); 48 49 /** 50 * Adds a (byte sequence, value) pair. 51 * The byte sequence must be unique. 52 * The bytes will be copied; the builder does not keep 53 * a reference to the input StringPiece or its data(). 54 * @param s The input byte sequence. 55 * @param value The value associated with this byte sequence. 56 * @param errorCode Standard ICU error code. Its input value must 57 * pass the U_SUCCESS() test, or else the function returns 58 * immediately. Check for U_FAILURE() on output or use with 59 * function chaining. (See User Guide for details.) 60 * @return *this 61 * @draft ICU 4.8 62 */ 63 BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); 64 65 /** 66 * Builds a BytesTrie for the add()ed data. 67 * Once built, no further data can be add()ed until clear() is called. 68 * 69 * This method passes ownership of the builder's internal result array to the new trie object. 70 * Another call to any build() variant will re-serialize the trie. 71 * After clear() has been called, a new array will be used as well. 72 * @param buildOption Build option, see UStringTrieBuildOption. 73 * @param errorCode Standard ICU error code. Its input value must 74 * pass the U_SUCCESS() test, or else the function returns 75 * immediately. Check for U_FAILURE() on output or use with 76 * function chaining. (See User Guide for details.) 77 * @return A new BytesTrie for the add()ed data. 78 * @draft ICU 4.8 79 */ 80 BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 81 82 /** 83 * Builds a BytesTrie for the add()ed data and byte-serializes it. 84 * Once built, no further data can be add()ed until clear() is called. 85 * 86 * Multiple calls to buildStringPiece() return StringPieces referring to the 87 * builder's same byte array, without rebuilding. 88 * If buildStringPiece() is called after build(), the trie will be 89 * re-serialized into a new array. 90 * If build() is called after buildStringPiece(), the trie object will become 91 * the owner of the previously returned array. 92 * After clear() has been called, a new array will be used as well. 93 * @param buildOption Build option, see UStringTrieBuildOption. 94 * @param errorCode Standard ICU error code. Its input value must 95 * pass the U_SUCCESS() test, or else the function returns 96 * immediately. Check for U_FAILURE() on output or use with 97 * function chaining. (See User Guide for details.) 98 * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 99 * @draft ICU 4.8 100 */ 101 StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 102 103 /** 104 * Removes all (byte sequence, value) pairs. 105 * New data can then be add()ed and a new trie can be built. 106 * @return *this 107 * @draft ICU 4.8 108 */ 109 BytesTrieBuilder &clear(); 110 111private: 112 BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 113 BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 114 115 void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 116 117 virtual int32_t getElementStringLength(int32_t i) const; 118 virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; 119 virtual int32_t getElementValue(int32_t i) const; 120 121 virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 122 123 virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 124 virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 125 virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; 126 127 virtual UBool matchNodesCanHaveValues() const { return FALSE; } 128 129 virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } 130 virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } 131 virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 132 133 /** 134 * @internal 135 */ 136 class BTLinearMatchNode : public LinearMatchNode { 137 public: 138 BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 139 virtual UBool operator==(const Node &other) const; 140 virtual void write(StringTrieBuilder &builder); 141 private: 142 const char *s; 143 }; 144 145 virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 146 Node *nextNode) const; 147 148 UBool ensureCapacity(int32_t length); 149 virtual int32_t write(int32_t byte); 150 int32_t write(const char *b, int32_t length); 151 virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 152 virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 153 virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 154 virtual int32_t writeDeltaTo(int32_t jumpTarget); 155 156 CharString *strings; // Pointer not object so we need not #include internal charstr.h. 157 BytesTrieElement *elements; 158 int32_t elementsCapacity; 159 int32_t elementsLength; 160 161 // Byte serialization of the trie. 162 // Grows from the back: bytesLength measures from the end of the buffer! 163 char *bytes; 164 int32_t bytesCapacity; 165 int32_t bytesLength; 166}; 167 168U_NAMESPACE_END 169 170#endif // __BYTESTRIEBUILDER_H__ 171