1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2010-2011, International Business Machines 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: stringtriebuilder.h 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010dec24 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#ifndef __STRINGTRIEBUILDER_H__ 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#define __STRINGTRIEBUILDER_H__ 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho// Forward declaration. 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostruct UHashtable; 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehotypedef struct UHashtable UHashtable; 24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Build options for BytesTrieBuilder and CharsTrieBuilder. 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoenum UStringTrieBuildOption { 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a trie quickly. 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho USTRINGTRIE_BUILD_FAST, 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Builds a trie more slowly, attempting to generate 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a shorter but equivalent serialization. 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This build option also uses more memory. 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This option can be effective when many integer values are the same 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and string/byte sequence suffixes can be shared. 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Runtime speed is not expected to improve. 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho USTRINGTRIE_BUILD_SMALL 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/** 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Base class for string trie builder classes. 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This class is not intended for public subclassing. 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @draft ICU 4.8 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass U_COMMON_API StringTrieBuilder : public UObject { 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UBool hashNode(const void *node); 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static UBool equalNodes(const void *left, const void *right); 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprotected: 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder(); 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual ~StringTrieBuilder(); 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void createCompactBuilder(int32_t sizeGuess, UErrorCode &errorCode); 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void deleteCompactBuilder(); 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void build(UStringTrieBuildOption buildOption, int32_t elementsLength, UErrorCode &errorCode); 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t writeNode(int32_t start, int32_t limit, int32_t unitIndex); 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t writeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, int32_t length); 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class Node; 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *makeNode(int32_t start, int32_t limit, int32_t unitIndex, UErrorCode &errorCode); 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *makeBranchSubNode(int32_t start, int32_t limit, int32_t unitIndex, 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length, UErrorCode &errorCode); 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementStringLength(int32_t i) const = 0; 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UChar getElementUnit(int32_t i, int32_t unitIndex) const = 0; 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getElementValue(int32_t i) const = 0; 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Finds the first unit index after this one where 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the first and last element have different units again. 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getLimitOfLinearMatch(int32_t first, int32_t last, int32_t unitIndex) const = 0; 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Number of different units at unitIndex. 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t countElementUnits(int32_t start, int32_t limit, int32_t unitIndex) const = 0; 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t skipElementsBySomeUnits(int32_t i, int32_t unitIndex, int32_t count) const = 0; 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t indexOfElementWithNextUnit(int32_t i, int32_t unitIndex, UChar unit) const = 0; 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool matchNodesCanHaveValues() const = 0; 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxBranchLinearSubNodeLength() const = 0; 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMinLinearMatch() const = 0; 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t getMaxLinearMatchLength() const = 0; 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // max(BytesTrie::kMaxBranchLinearSubNodeLength, UCharsTrie::kMaxBranchLinearSubNodeLength). 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const int32_t kMaxBranchLinearSubNodeLength=5; 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Maximum number of nested split-branch levels for a branch on all 2^16 possible UChar units. 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // log2(2^16/kMaxBranchLinearSubNodeLength) rounded up. 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static const int32_t kMaxSplitBranchLevels=14; 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Makes sure that there is only one unique node registered that is 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * equivalent to newNode. 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param newNode Input node. The builder takes ownership. 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode ICU in/out UErrorCode. 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return newNode if it is the first of its kind, or 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * an equivalent node if newNode is a duplicate. 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @internal 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *registerNode(Node *newNode, UErrorCode &errorCode); 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Makes sure that there is only one unique FinalValueNode registered 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * with this value. 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Avoids creating a node if the value is a duplicate. 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param value A final value. 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param errorCode ICU in/out UErrorCode. 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Set to U_MEMORY_ALLOCATION_ERROR if it was success but newNode==NULL. 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return A FinalValueNode with the given value. 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @internal 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *registerFinalValue(int32_t value, UErrorCode &errorCode); 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /* 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * C++ note: 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * registerNode() and registerFinalValue() take ownership of their input nodes, 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and only return owned nodes. 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If they see a failure UErrorCode, they will delete the input node. 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If they get a NULL pointer, they will record a U_MEMORY_ALLOCATION_ERROR. 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * If there is a failure, they return NULL. 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * NULL Node pointers can be safely passed into other Nodes because 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * they call the static Node::hashCode() which checks for a NULL pointer first. 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Therefore, as long as builder functions register a new node, 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * they need to check for failures only before explicitly dereferencing 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * a Node pointer, or before setting a new UErrorCode. 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Hash set of nodes, maps from nodes to integer 1. 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UHashtable *nodes; 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class Node : public UObject { 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node(int32_t initialHash) : hash(initialHash), offset(0) {} 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline int32_t hashCode() const { return hash; } 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Handles node==NULL. 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho static inline int32_t hashCode(const Node *node) { return node==NULL ? 0 : node->hashCode(); } 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Base class operator==() compares the actual class types. 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline UBool operator!=(const Node &other) const { return !operator==(other); } 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** 183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Traverses the Node graph and numbers branch edges, with rightmost edges first. 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This is to avoid writing a duplicate node twice. 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Branch nodes in this trie data structure are not symmetric. 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Most branch edges "jump" to other nodes but the rightmost branch edges 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * just continue without a jump. 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Therefore, write() must write the rightmost branch edge last 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * (trie units are written backwards), and must write it at that point even if 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * it is a duplicate of a node previously written elsewhere. 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * This function visits and marks right branch edges first. 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Edges are numbered with increasingly negative values because we share the 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * offset field which gets positive values when nodes are written. 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * A branch edge also remembers the first number for any of its edges. 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * When a further-left branch edge has a number in the range of the rightmost 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * edge's numbers, then it will be written as part of the required right edge 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * and we can avoid writing it first. 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * After root.markRightEdgesFirst(-1) the offsets of all nodes are negative 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * edge numbers. 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @param edgeNumber The first edge number for this node and its sub-nodes. 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * @return An edge number that is at least the maximum-negative 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of the input edge number and the numbers of this node and all of its sub-nodes. 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // write() must set the offset to a positive value. 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder) = 0; 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // See markRightEdgesFirst. 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline void writeUnlessInsideRightEdge(int32_t firstRight, int32_t lastRight, 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder &builder) { 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Note: Edge numbers are negative, lastRight<=firstRight. 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If offset>0 then this node and its sub-nodes have been written already 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // and we need not write them again. 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If this node is part of the unwritten right branch edge, 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // then we wait until that is written. 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset<0 && (offset<lastRight || firstRight<offset)) { 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho write(builder); 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho inline int32_t getOffset() const { return offset; } 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t hash; 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset; 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho private: 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No ICU "poor man's RTTI" for this class nor its subclasses. 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UClassID getDynamicClassID() const; 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // This class should not be overridden because 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // registerFinalValue() compares a stack-allocated FinalValueNode 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (stack-allocated so that we don't unnecessarily create lots of duplicate nodes) 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // with the input node, and the 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // !Node::operator==(other) used inside FinalValueNode::operator==(other) 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // will be false if the typeid's are different. 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class FinalValueNode : public Node { 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho FinalValueNode(int32_t v) : Node(0x111111*37+v), value(v) {} 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class ValueNode : public Node { 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ValueNode(int32_t initialHash) : Node(initialHash), hasValue(FALSE), value(0) {} 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void setValue(int32_t v) { 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hasValue=TRUE; 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=v; 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=hash*37+v; 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool hasValue; 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class IntermediateValueNode : public ValueNode { 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho IntermediateValueNode(int32_t v, Node *nextNode) 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode(0x222222*37+hashCode(nextNode)), next(nextNode) { setValue(v); } 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; 274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class LinearMatchNode : public ValueNode { 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho LinearMatchNode(int32_t len, Node *nextNode) 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode((0x333333*37+len)*37+hashCode(nextNode)), 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length(len), next(nextNode) {} 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class BranchNode : public Node { 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BranchNode(int32_t initialHash) : Node(initialHash) {} 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t firstEdgeNumber; 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class ListBranchNode : public BranchNode { 299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ListBranchNode() : BranchNode(0x444444), length(0) {} 301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Adds a unit with a final value. 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void add(int32_t c, int32_t value) { 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho units[length]=(UChar)c; 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equal[length]=NULL; 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho values[length]=value; 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=(hash*37+c)*37+value; 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Adds a unit which leads to another match node. 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void add(int32_t c, Node *node) { 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho units[length]=(UChar)c; 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho equal[length]=node; 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho values[length]=0; 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=(hash*37+c)*37+hashCode(node); 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *equal[kMaxBranchLinearSubNodeLength]; // NULL means "has final value". 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t values[kMaxBranchLinearSubNodeLength]; 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar units[kMaxBranchLinearSubNodeLength]; 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class SplitBranchNode : public BranchNode { 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho SplitBranchNode(UChar middleUnit, Node *lessThanNode, Node *greaterOrEqualNode) 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : BranchNode(((0x555555*37+middleUnit)*37+ 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hashCode(lessThanNode))*37+hashCode(greaterOrEqualNode)), 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho unit(middleUnit), lessThan(lessThanNode), greaterOrEqual(greaterOrEqualNode) {} 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UChar unit; 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *lessThan; 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *greaterOrEqual; 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Branch head node, for writing the actual node lead unit. 344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho class BranchHeadNode : public ValueNode { 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho public: 347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BranchHeadNode(int32_t len, Node *subNode) 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : ValueNode((0x666666*37+len)*37+hashCode(subNode)), 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length(len), next(subNode) {} 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UBool operator==(const Node &other) const; 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t markRightEdgesFirst(int32_t edgeNumber); 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual void write(StringTrieBuilder &builder); 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho protected: 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *next; // A branch sub-node. 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho }; 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual Node *createLinearMatchNode(int32_t i, int32_t unitIndex, int32_t length, 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *nextNode) const = 0; 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t write(int32_t unit) = 0; 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeElementUnits(int32_t i, int32_t unitIndex, int32_t length) = 0; 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndFinal(int32_t i, UBool isFinal) = 0; 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeValueAndType(UBool hasValue, int32_t value, int32_t node) = 0; 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho /** @internal */ 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual int32_t writeDeltaTo(int32_t jumpTarget) = 0; 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No ICU "poor man's RTTI" for this class nor its subclasses. 375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho virtual UClassID getDynamicClassID() const; 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#endif // __STRINGTRIEBUILDER_H__ 381