1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2010-2011, International Business Machines 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: bytestriebuilder.cpp 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010sep25 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestrie.h" 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestriebuilder.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringpiece.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "charstr.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uhash.h" 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uarrsort.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Note: This builder implementation stores (bytes, value) pairs with full copies 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of the byte sequences, until the BytesTrie is built. 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * It might(!) take less memory if we collected the data in a temporary, dynamic trie. 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass BytesTrieElement : public UMemory { 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use compiler's default constructor, initializes nothing. 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode); 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece getString(const CharString &strings) const { 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=(uint8_t)strings[offset++]; 44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset+=2; 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return StringPiece(strings.data()+offset, length); 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t getStringLength(const CharString &strings) const { 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (uint8_t)strings[offset]; 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t getValue() const { return value; } 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *data(const CharString &strings) const { 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++offset; 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset+2; 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return strings.data()+offset; 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If the stringOffset is non-negative, then the first strings byte contains 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the string length. 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If the stringOffset is negative, then the first two strings bytes contain 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the string length (big-endian), and the offset needs to be bit-inverted. 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t stringOffset; 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::setTo(const StringPiece &s, int32_t val, 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho CharString &strings, UErrorCode &errorCode) { 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=s.length(); 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>0xffff) { 95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Too long: We store the length in 1 or 2 bytes. 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=strings.length(); 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>0xff) { 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append((char)(length>>8), errorCode); 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append((char)length, errorCode); 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stringOffset=offset; 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=val; 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append(s, errorCode); 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // TODO: add StringPiece::compare(), see ticket #8187 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece thisString=getString(strings); 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece otherString=other.getString(strings); 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t lengthDiff=thisString.length()-otherString.length(); 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t commonLength; 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(lengthDiff<=0) { 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho commonLength=thisString.length(); 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho commonLength=otherString.length(); 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return diff!=0 ? diff : lengthDiff; 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes(NULL), bytesCapacity(0), bytesLength(0) { 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings=new CharString(); 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(strings==NULL) { 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::~BytesTrieBuilder() { 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete strings; 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete[] elements; 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder & 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) { 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesLength>0) { 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Cannot add elements after building. 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_NO_WRITE_PERMISSION; 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength==elementsCapacity) { 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newCapacity; 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsCapacity==0) { 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity=1024; 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity=4*elementsCapacity; 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newElements==NULL) { 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength>0) { 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete[] elements; 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements=newElements; 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elementsCapacity=newCapacity; 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements[elementsLength++].setTo(s, value, *strings, errorCode); 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_BEGIN 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehocompareElementStrings(const void *context, const void *left, const void *right) { 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const CharString *strings=reinterpret_cast<const CharString *>(context); 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement *leftElement=reinterpret_cast<const BytesTrieElement *>(left); 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement *rightElement=reinterpret_cast<const BytesTrieElement *>(right); 183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return leftElement->compareStringTo(*rightElement, *strings); 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_END 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrie * 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buildBytes(buildOption, errorCode); 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrie *newTrie=NULL; 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_SUCCESS(errorCode)) { 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newTrie==NULL) { 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=NULL; // The new trie now owns the array. 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return newTrie; 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringPiece 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buildBytes(buildOption, errorCode); 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece result; 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_SUCCESS(errorCode)) { 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.set(bytes+(bytesCapacity-bytesLength), bytesLength); 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes!=NULL && bytesLength>0) { 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Already built. 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesLength==0) { 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength==0) { 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareElementStrings, strings, 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho FALSE, // need not be a stable sort 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho &errorCode); 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Duplicate strings are not allowed. 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece prev=elements[0].getString(*strings); 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(int32_t i=1; i<elementsLength; ++i) { 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece current=elements[i].getString(*strings); 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(prev==current) { 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_ILLEGAL_ARGUMENT_ERROR; 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho prev=current; 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Create and byte-serialize the trie for the elements. 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=0; 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t capacity=strings->length(); 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(capacity<1024) { 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity=1024; 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesCapacity<capacity) { 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=reinterpret_cast<char *>(uprv_malloc(capacity)); 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=capacity; 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder::build(buildOption, elementsLength, errorCode); 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder & 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::clear() { 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings->clear(); 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elementsLength=0; 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=0; 273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementStringLength(int32_t i) const { 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return elements[i].getStringLength(*strings); 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUChar 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (uint8_t)elements[i].charAt(byteIndex, *strings); 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementValue(int32_t i) const { 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return elements[i].getValue(); 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement &firstElement=elements[first]; 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement &lastElement=elements[last]; 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t minStringLength=firstElement.getStringLength(*strings); 296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(++byteIndex<minStringLength && 297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho firstElement.charAt(byteIndex, *strings)== 298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho lastElement.charAt(byteIndex, *strings)) {} 299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return byteIndex; 300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const { 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=0; // Number of different bytes at byteIndex. 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i=start; 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char byte=elements[i++].charAt(byteIndex, *strings); 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) { 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(i<limit); 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return length; 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const { 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char byte=elements[i++].charAt(byteIndex, *strings); 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(byte==elements[i].charAt(byteIndex, *strings)) { 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(--count>0); 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return i; 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char b=(char)byte; 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(b==elements[i].charAt(byteIndex, *strings)) { 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return i; 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : LinearMatchNode(len, nextNode), s(bytes) { 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho hash=hash*37+uhash_hashCharsN(bytes, len); 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { 343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(this==&other) { 344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!LinearMatchNode::operator==(other)) { 347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0==uprv_memcmp(s, o.s, length); 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder &b=(BytesTrieBuilder &)builder; 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho next->write(builder); 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho b.write(s, length); 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=b.write(b.getMinLinearMatch()+length-1); 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringTrieBuilder::Node * 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *nextNode) const { 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return new BTLinearMatchNode( 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements[i].getString(*strings).data()+byteIndex, 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length, 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho nextNode); 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::ensureCapacity(int32_t length) { 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; // previous memory allocation had failed 374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>bytesCapacity) { 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newCapacity=bytesCapacity; 377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity*=2; 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(newCapacity<=length); 380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char *newBytes=reinterpret_cast<char *>(uprv_malloc(newCapacity)); 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newBytes==NULL) { 382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // unable to allocate memory 383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=NULL; 385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newBytes+(newCapacity-bytesLength), 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes+(bytesCapacity-bytesLength), bytesLength); 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=newBytes; 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=newCapacity; 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(int32_t byte) { 399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newLength=bytesLength+1; 400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ensureCapacity(newLength)) { 401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=newLength; 402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes[bytesCapacity-bytesLength]=(char)byte; 403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return bytesLength; 405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(const char *b, int32_t length) { 409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newLength=bytesLength+length; 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ensureCapacity(newLength)) { 411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=newLength; 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); 413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return bytesLength; 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { 419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(elements[i].getString(*strings).data()+byteIndex, length); 420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { 424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(0<=i && i<=BytesTrie::kMaxOneByteValue) { 425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); 426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char intBytes[5]; 428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=1; 429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<0 || i>0xffffff) { 430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFiveByteValueLead; 431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>24); 432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[2]=(char)(i>>16); 433b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[3]=(char)(i>>8); 434b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[4]=(char)i; 435b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=5; 436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // } else if(i<=BytesTrie::kMaxOneByteValue) { 437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); 438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxTwoByteValue) { 440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); 441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxThreeByteValue) { 443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); 444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFourByteValueLead; 446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>16); 447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=2; 448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)(i>>8); 450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)i; 452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)((intBytes[0]<<1)|isFinal); 454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(intBytes, length); 455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { 459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=write(node); 460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(hasValue) { 461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=writeValueAndFinal(value, FALSE); 462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return offset; 464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { 468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i=bytesLength-jumpTarget; 469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho U_ASSERT(i>=0); 470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxOneByteDelta) { 471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(i); 472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char intBytes[5]; 474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxTwoByteDelta) { 476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); 477b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=1; 478b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxThreeByteDelta) { 480b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); 481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=2; 482b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 483b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=0xffffff) { 484b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; 485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=3; 486b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 487b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; 488b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>24); 489b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=4; 490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>16); 492b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 493b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>8); 494b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 495b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)i; 496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(intBytes, length); 497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 500