1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* Copyright (C) 2010-2012, International Business Machines 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: stringtriebuilder.h 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010dec24 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __STRINGTRIEBUILDER_H__ 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __STRINGTRIEBUILDER_H__ 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 2154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius/** 2254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * \file 2354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * \brief C++ API: Builder API for trie builders 2454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 2554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Forward declaration. 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostruct UHashtable; 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehotypedef struct UHashtable UHashtable; 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Build options for BytesTrieBuilder and CharsTrieBuilder. 32103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoenum UStringTrieBuildOption { 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a trie quickly. 37103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho USTRINGTRIE_BUILD_FAST, 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a trie more slowly, attempting to generate 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a shorter but equivalent serialization. 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This build option also uses more memory. 44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This option can be effective when many integer values are the same 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and string/byte sequence suffixes can be shared. 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Runtime speed is not expected to improve. 48103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho USTRINGTRIE_BUILD_SMALL 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Base class for string trie builder classes. 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing. 59103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius * @stable ICU 4.8 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API StringTrieBuilder : public UObject { 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 63103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UBool hashNode(const void *node); 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UBool equalNodes(const void *left, const void *right); 68103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprotected: 71103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // Do not enclose the protected default constructor with #ifndef U_HIDE_INTERNAL_API 72103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius // or else the compiler will create a public default constructor. 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder(); 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual ~StringTrieBuilder(); 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 78103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void deleteCompactBuilder(); 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex); 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length); 91103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class Node; 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 95103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode); 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length, UErrorCode &errorCode); 101103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementStringLength(int32_t i) const = 0; 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementValue(int32_t i) const = 0; 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Finds the first unit index after this one where 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the first and last element have different units again. 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0; 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Number of different units at unitIndex. 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0; 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool matchNodesCanHaveValues() const = 0; 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxBranchLinearSubNodeLength() const = 0; 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMinLinearMatch() const = 0; 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxLinearMatchLength() const = 0; 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 133103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#ifndef U_HIDE_INTERNAL_API 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength). 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const int32_t kMaxBranchLinearSubNodeLength=5; 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const int32_t kMaxSplitBranchLevels=14; 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Makes sure that there is only one unique node registered that is 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * equivalent to newNode. 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param newNode Input node. The builder takes ownership. 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode ICU in/out UErrorCode. 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return newNode if it is the first of its kind, or 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * an equivalent node if newNode is a duplicate. 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @internal 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *registerNode(Node *newNode, UErrorCode &errorCode); 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Makes sure that there is only one unique FinalValueNode registered 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * with this value. 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Avoids creating a node if the value is a duplicate. 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param value A final value. 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode ICU in/out UErrorCode. 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return A FinalValueNode with the given value. 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @internal 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *registerFinalValue(int32_t value, UErrorCode &errorCode); 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * C++ note: 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * registerNode() and registerFinalValue() take ownership of their input nodes, 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and only return owned nodes. 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If they see a failure UErrorCode, they will delete the input node. 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR. 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If there is a failure, they return NULL. 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * NULL Node pointers can be safely passed into other Nodes because 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * they call the static Node::hashCode() which checks for a NULL pointer first. 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Therefore, as long as builder functions register a new node, 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * they need to check for failures only before explicitly dereferencing 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a Node pointer, or before setting a new UErrorCode. 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Hash set of nodes, maps from nodes to integer 1. 183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UHashtable *nodes; 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class Node : public UObject { 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node(int32_t initialHash) : hash(initialHash), offset(0) {} 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline int32_t hashCode() const { return hash; } 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Handles node==NULL. 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); } 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Base class operator==() compares the actual class types. 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline UBool operator!=(const Node &other) const { return !operator==(other); } 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Traverses the Node graph and numbers branch edges, with rightmost edges first. 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This is to avoid writing a duplicate node twice. 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Branch nodes in this trie data structure are not symmetric. 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Most branch edges "jump" to other nodes but the rightmost branch edges 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * just continue without a jump. 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Therefore, write() must write the rightmost branch edge last 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * (trie units are written backwards), and must write it at that point even if 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * it is a duplicate of a node previously written elsewhere. 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This function visits and marks right branch edges first. 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Edges are numbered with increasingly negative values because we share the 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * offset field which gets positive values when nodes are written. 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * A branch edge also remembers the first number for any of its edges. 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * When a further-left branch edge has a number in the range of the rightmost 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * edge's numbers, then it will be written as part of the required right edge 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and we can avoid writing it first. 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * edge numbers. 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param edgeNumber The first edge number for this node and its sub-nodes. 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return An edge number that is at least the maximum-negative 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of the input edge number and the numbers of this node and all of its sub-nodes. 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // write() must set the offset to a positive value. 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder) = 0; 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // See markRightEdgesFirst. 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder &builder) { 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Note: Edge numbers are negative, lastRight<=firstRight. 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If offset>0 then this node and its sub-nodes have been written already 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // and we need not write them again. 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If this node is part of the unwritten right branch edge, 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // then we wait until that is written. 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset<0 && (offset<lastRight || firstRight<offset)) { 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho write(builder); 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline int32_t getOffset() const { return offset; } 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t hash; 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset; 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // This class should not be overridden because 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // registerFinalValue() compares a stack-allocated FinalValueNode 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes) 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // with the input node, and the 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // !Node::operator==(other) used inside FinalValueNode::operator==(other) 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // will be false if the typeid's are different. 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class FinalValueNode : public Node { 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 26054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 26154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 26254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class ValueNode : public Node { 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {} 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void setValue(int32_t v) { 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hasValue=TRUE; 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=v; 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=hash*37+v; 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool hasValue; 274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 27754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 27854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 27954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class IntermediateValueNode : public ValueNode { 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho IntermediateValueNode(int32_t v, Node *nextNode) 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 29154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 29254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 29354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class LinearMatchNode : public ValueNode { 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho LinearMatchNode(int32_t len, Node *nextNode) 297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode((0x333333*37+len)*37+hashCode(nextNode)), 298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length(len), next(nextNode) {} 299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 30654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 30754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 30854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class BranchNode : public Node { 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BranchNode(int32_t initialHash) : Node(initialHash) {} 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t firstEdgeNumber; 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 31654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 31754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 31854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class ListBranchNode : public BranchNode { 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ListBranchNode() : BranchNode(0x444444), length(0) {} 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Adds a unit with a final value. 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void add(int32_t c, int32_t value) { 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho units[length]=(UChar)c; 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equal[length]=NULL; 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho values[length]=value; 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=(hash*37+c)*37+value; 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Adds a unit which leads to another match node. 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void add(int32_t c, Node *node) { 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho units[length]=(UChar)c; 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equal[length]=node; 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho values[length]=0; 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=(hash*37+c)*37+hashCode(node); 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". 343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t values[kMaxBranchLinearSubNodeLength]; 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar units[kMaxBranchLinearSubNodeLength]; 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 34854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /** 34954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius * @internal 35054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius */ 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class SplitBranchNode : public BranchNode { 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : BranchNode(((0x555555*37+middleUnit)*37+ 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar unit; 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *lessThan; 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *greaterOrEqual; 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Branch head node, for writing the actual node lead unit. 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class BranchHeadNode : public ValueNode { 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BranchHeadNode(int32_t len, Node *subNode) 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode((0x666666*37+len)*37+hashCode(subNode)), 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length(len), next(subNode) {} 373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; // A branch sub-node. 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 380103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#endif /* U_HIDE_INTERNAL_API */ 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *nextNode) const = 0; 385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t write(int32_t unit) = 0; 388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0; 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0; 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0; 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0; 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif // __STRINGTRIEBUILDER_H__ 401