1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius* Copyright (C) 2010-2014, International Business Machines 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: bytestriebuilder.h 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010sep25 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 1554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius/** 1654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * \file 1754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * \brief C++ API: Builder for icu::BytesTrie 1854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 1954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __BYTESTRIEBUILDER_H__ 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __BYTESTRIEBUILDER_H__ 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestrie.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringpiece.h" 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringtriebuilder.h" 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass BytesTrieElement; 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass CharString; 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builder class for BytesTrie. 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing. 37103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API BytesTrieBuilder : public StringTrieBuilder { 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Constructs an empty builder. 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode Standard ICU error code. 44103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder(UErrorCode &errorCode); 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Destructor. 50103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual ~BytesTrieBuilder(); 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Adds a (byte sequence, value) pair. 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The byte sequence must be unique. 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * The bytes will be copied; the builder does not keep 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a reference to the input StringPiece or its data(). 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param s The input byte sequence. 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param value The value associated with this byte sequence. 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode Standard ICU error code. Its input value must 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * pass the U_SUCCESS() test, or else the function returns 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * immediately. Check for U_FAILURE() on output or use with 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * function chaining. (See User Guide for details.) 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return *this 66103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder &add(const StringPiece &s, int32_t value, UErrorCode &errorCode); 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a BytesTrie for the add()ed data. 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Once built, no further data can be add()ed until clear() is called. 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 7459d709d503bab6e2b61931737e662dd293b40578ccornelius * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 7559d709d503bab6e2b61931737e662dd293b40578ccornelius * must have been add()ed. 7659d709d503bab6e2b61931737e662dd293b40578ccornelius * 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This method passes ownership of the builder's internal result array to the new trie object. 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Another call to any build() variant will re-serialize the trie. 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * After clear() has been called, a new array will be used as well. 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param buildOption Build option, see UStringTrieBuildOption. 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode Standard ICU error code. Its input value must 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * pass the U_SUCCESS() test, or else the function returns 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * immediately. Check for U_FAILURE() on output or use with 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * function chaining. (See User Guide for details.) 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return A new BytesTrie for the add()ed data. 86103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrie *build(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a BytesTrie for the add()ed data and byte-serializes it. 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Once built, no further data can be add()ed until clear() is called. 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 9459d709d503bab6e2b61931737e662dd293b40578ccornelius * A BytesTrie cannot be empty. At least one (byte sequence, value) pair 9559d709d503bab6e2b61931737e662dd293b40578ccornelius * must have been add()ed. 9659d709d503bab6e2b61931737e662dd293b40578ccornelius * 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Multiple calls to buildStringPiece() return StringPieces referring to the 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * builder's same byte array, without rebuilding. 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If buildStringPiece() is called after build(), the trie will be 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * re-serialized into a new array. 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If build() is called after buildStringPiece(), the trie object will become 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * the owner of the previously returned array. 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * After clear() has been called, a new array will be used as well. 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param buildOption Build option, see UStringTrieBuildOption. 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode Standard ICU error code. Its input value must 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * pass the U_SUCCESS() test, or else the function returns 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * immediately. Check for U_FAILURE() on output or use with 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * function chaining. (See User Guide for details.) 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return A StringPiece which refers to the byte-serialized BytesTrie for the add()ed data. 110103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Removes all (byte sequence, value) pairs. 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * New data can then be add()ed and a new trie can be built. 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return *this 118103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder &clear(); 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder(const BytesTrieBuilder &other); // no copy constructor 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder &operator=(const BytesTrieBuilder &other); // no assignment operator 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode); 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementStringLength(int32_t i) const; 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UChar getElementUnit(int32_t i, int32_t byteIndex) const; 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementValue(int32_t i) const; 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const; 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const; 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const; 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const; 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool matchNodesCanHaveValues() const { return FALSE; } 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxBranchLinearSubNodeLength() const { return BytesTrie::kMaxBranchLinearSubNodeLength; } 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMinLinearMatch() const { return BytesTrie::kMinLinearMatch; } 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxLinearMatchLength() const { return BytesTrie::kMaxLinearMatchLength; } 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @internal 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class BTLinearMatchNode : public LinearMatchNode { 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BTLinearMatchNode(const char *units, int32_t len, Node *nextNode); 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho private: 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *s; 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual Node *createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *nextNode) const; 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool ensureCapacity(int32_t length); 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t write(int32_t byte); 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t write(const char *b, int32_t length); 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeElementUnits(int32_t i, int32_t byteIndex, int32_t length); 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal); 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node); 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeDeltaTo(int32_t jumpTarget); 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho CharString *strings; // Pointer not object so we need not #include internal charstr.h. 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieElement *elements; 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t elementsCapacity; 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t elementsLength; 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Byte serialization of the trie. 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Grows from the back: bytesLength measures from the end of the buffer! 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *bytes; 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t bytesCapacity; 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t bytesLength; 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif // __BYTESTRIEBUILDER_H__ 182