1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius* Copyright (C) 2010-2012, International Business Machines 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: bytestriebuilder.cpp 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010sep25 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestrie.h" 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestriebuilder.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringpiece.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "charstr.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h" 21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uhash.h" 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uarrsort.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "ustr_imp.h" 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Note: This builder implementation stores (bytes, value) pairs with full copies 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of the byte sequences, until the BytesTrie is built. 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * It might(!) take less memory if we collected the data in a temporary, dynamic trie. 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */ 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass BytesTrieElement : public UMemory { 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic: 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use compiler's default constructor, initializes nothing. 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode); 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece getString(const CharString &strings) const { 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=(uint8_t)strings[offset++]; 45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset+=2; 49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return StringPiece(strings.data()+offset, length); 51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t getStringLength(const CharString &strings) const { 53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (uint8_t)strings[offset]; 56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t getValue() const { return value; } 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate: 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const char *data(const CharString &strings) const { 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=stringOffset; 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(offset>=0) { 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++offset; 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset+2; 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return strings.data()+offset; 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If the stringOffset is non-negative, then the first strings byte contains 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the string length. 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // If the stringOffset is negative, then the first two strings bytes contain 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // the string length (big-endian), and the offset needs to be bit-inverted. 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t stringOffset; 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}; 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::setTo(const StringPiece &s, int32_t val, 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho CharString &strings, UErrorCode &errorCode) { 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=s.length(); 95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>0xffff) { 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Too long: We store the length in 1 or 2 bytes. 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=strings.length(); 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>0xff) { 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=~offset; 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append((char)(length>>8), errorCode); 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append((char)length, errorCode); 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stringOffset=offset; 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=val; 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings.append(s, errorCode); 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // TODO: add StringPiece::compare(), see ticket #8187 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece thisString=getString(strings); 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece otherString=other.getString(strings); 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t lengthDiff=thisString.length()-otherString.length(); 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t commonLength; 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(lengthDiff<=0) { 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho commonLength=thisString.length(); 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho commonLength=otherString.length(); 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return diff!=0 ? diff : lengthDiff; 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes(NULL), bytesCapacity(0), bytesLength(0) { 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings=new CharString(); 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(strings==NULL) { 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::~BytesTrieBuilder() { 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete strings; 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete[] elements; 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder & 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) { 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesLength>0) { 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Cannot add elements after building. 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_NO_WRITE_PERMISSION; 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength==elementsCapacity) { 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newCapacity; 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsCapacity==0) { 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity=1024; 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity=4*elementsCapacity; 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newElements==NULL) { 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius return *this; // error instead of dereferencing null 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength>0) { 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delete[] elements; 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements=newElements; 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elementsCapacity=newCapacity; 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements[elementsLength++].setTo(s, value, *strings, errorCode); 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_BEGIN 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehocompareElementStrings(const void *context, const void *left, const void *right) { 18254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const CharString *strings=static_cast<const CharString *>(context); 18354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left); 18454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right); 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return leftElement->compareStringTo(*rightElement, *strings); 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_END 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrie * 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buildBytes(buildOption, errorCode); 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrie *newTrie=NULL; 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_SUCCESS(errorCode)) { 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newTrie==NULL) { 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=NULL; // The new trie now owns the array. 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return newTrie; 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringPiece 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho buildBytes(buildOption, errorCode); 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece result; 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_SUCCESS(errorCode)) { 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result.set(bytes+(bytesCapacity-bytesLength), bytesLength); 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes!=NULL && bytesLength>0) { 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Already built. 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesLength==0) { 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(elementsLength==0) { 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho compareElementStrings, strings, 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho FALSE, // need not be a stable sort 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho &errorCode); 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(U_FAILURE(errorCode)) { 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Duplicate strings are not allowed. 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece prev=elements[0].getString(*strings); 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(int32_t i=1; i<elementsLength; ++i) { 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringPiece current=elements[i].getString(*strings); 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(prev==current) { 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_ILLEGAL_ARGUMENT_ERROR; 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho prev=current; 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Create and byte-serialize the trie for the elements. 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=0; 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t capacity=strings->length(); 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(capacity<1024) { 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho capacity=1024; 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytesCapacity<capacity) { 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 25654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius bytes=static_cast<char *>(uprv_malloc(capacity)); 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return; 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=capacity; 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho StringTrieBuilder::build(buildOption, elementsLength, errorCode); 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho errorCode=U_MEMORY_ALLOCATION_ERROR; 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder & 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::clear() { 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho strings->clear(); 273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elementsLength=0; 274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=0; 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return *this; 276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementStringLength(int32_t i) const { 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return elements[i].getStringLength(*strings); 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUChar 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (uint8_t)elements[i].charAt(byteIndex, *strings); 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementValue(int32_t i) const { 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return elements[i].getValue(); 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement &firstElement=elements[first]; 296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BytesTrieElement &lastElement=elements[last]; 297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t minStringLength=firstElement.getStringLength(*strings); 298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(++byteIndex<minStringLength && 299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho firstElement.charAt(byteIndex, *strings)== 300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho lastElement.charAt(byteIndex, *strings)) {} 301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return byteIndex; 302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const { 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=0; // Number of different bytes at byteIndex. 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i=start; 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char byte=elements[i++].charAt(byteIndex, *strings); 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) { 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(i<limit); 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return length; 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const { 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char byte=elements[i++].charAt(byteIndex, *strings); 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(byte==elements[i].charAt(byteIndex, *strings)) { 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(--count>0); 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return i; 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char b=(char)byte; 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(b==elements[i].charAt(byteIndex, *strings)) { 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++i; 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return i; 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho : LinearMatchNode(len, nextNode), s(bytes) { 340103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius hash=hash*37+ustr_hashCharsN(bytes, len); 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(this==&other) { 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!LinearMatchNode::operator==(other)) { 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0==uprv_memcmp(s, o.s, length); 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho BytesTrieBuilder &b=(BytesTrieBuilder &)builder; 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho next->write(builder); 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho b.write(s, length); 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=b.write(b.getMinLinearMatch()+length-1); 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringTrieBuilder::Node * 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho Node *nextNode) const { 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return new BTLinearMatchNode( 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho elements[i].getString(*strings).data()+byteIndex, 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length, 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho nextNode); 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::ensureCapacity(int32_t length) { 374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(bytes==NULL) { 375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; // previous memory allocation had failed 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>bytesCapacity) { 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newCapacity=bytesCapacity; 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho newCapacity*=2; 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(newCapacity<=length); 38254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius char *newBytes=static_cast<char *>(uprv_malloc(newCapacity)); 383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(newBytes==NULL) { 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // unable to allocate memory 385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=NULL; 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=0; 388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(newBytes+(newCapacity-bytesLength), 391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes+(bytesCapacity-bytesLength), bytesLength); 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(bytes); 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes=newBytes; 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesCapacity=newCapacity; 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(int32_t byte) { 401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newLength=bytesLength+1; 402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ensureCapacity(newLength)) { 403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=newLength; 404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytes[bytesCapacity-bytesLength]=(char)byte; 405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return bytesLength; 407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(const char *b, int32_t length) { 411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t newLength=bytesLength+length; 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(ensureCapacity(newLength)) { 413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho bytesLength=newLength; 414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); 415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return bytesLength; 417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { 421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(elements[i].getString(*strings).data()+byteIndex, length); 422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { 426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(0<=i && i<=BytesTrie::kMaxOneByteValue) { 427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); 428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char intBytes[5]; 430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=1; 431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<0 || i>0xffffff) { 432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFiveByteValueLead; 43354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius intBytes[1]=(char)((uint32_t)i>>24); 43454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius intBytes[2]=(char)((uint32_t)i>>16); 43554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius intBytes[3]=(char)((uint32_t)i>>8); 436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[4]=(char)i; 437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=5; 438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // } else if(i<=BytesTrie::kMaxOneByteValue) { 439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); 440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxTwoByteValue) { 442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); 443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxThreeByteValue) { 445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); 446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFourByteValueLead; 448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>16); 449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=2; 450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)(i>>8); 452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)i; 454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)((intBytes[0]<<1)|isFinal); 456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(intBytes, length); 457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { 461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t offset=write(node); 462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(hasValue) { 463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho offset=writeValueAndFinal(value, FALSE); 464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return offset; 466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { 470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t i=bytesLength-jumpTarget; 471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho U_ASSERT(i>=0); 472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxOneByteDelta) { 473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(i); 474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho char intBytes[5]; 476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length; 477b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxTwoByteDelta) { 478b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); 479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=1; 480b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=BytesTrie::kMaxThreeByteDelta) { 482b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); 483b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=2; 484b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(i<=0xffffff) { 486b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; 487b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=3; 488b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 489b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; 490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>24); 491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=4; 492b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 493b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>16); 494b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 495b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[1]=(char)(i>>8); 496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho intBytes[length++]=(char)i; 498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return write(intBytes, length); 499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 502