16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 2010-2012, International Business Machines 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: bytestriebuilder.cpp 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2010sep25 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Markus W. Scherer 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/bytestrie.h" 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/bytestriebuilder.h" 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/stringpiece.h" 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "charstr.h" 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uhash.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uarrsort.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uassert.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "ustr_imp.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_BEGIN 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Note: This builder implementation stores (bytes, value) pairs with full copies 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * of the byte sequences, until the BytesTrie is built. 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * It might(!) take less memory if we collected the data in a temporary, dynamic trie. 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgclass BytesTrieElement : public UMemory { 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgpublic: 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Use compiler's default constructor, initializes nothing. 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode); 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece getString(const CharString &strings) const { 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=stringOffset; 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset>=0) { 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=(uint8_t)strings[offset++]; 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=~offset; 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset+=2; 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return StringPiece(strings.data()+offset, length); 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t getStringLength(const CharString &strings) const { 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=stringOffset; 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset>=0) { 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (uint8_t)strings[offset]; 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=~offset; 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1]; 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; } 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t getValue() const { return value; } 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const; 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgprivate: 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const char *data(const CharString &strings) const { 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=stringOffset; 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(offset>=0) { 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++offset; 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=~offset+2; 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return strings.data()+offset; 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the stringOffset is non-negative, then the first strings byte contains 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the string length. 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // If the stringOffset is negative, then the first two strings bytes contain 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // the string length (big-endian), and the offset needs to be bit-inverted. 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.) 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t stringOffset; 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t value; 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieElement::setTo(const StringPiece &s, int32_t val, 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org CharString &strings, UErrorCode &errorCode) { 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=s.length(); 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>0xffff) { 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Too long: We store the length in 1 or 2 bytes. 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=strings.length(); 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>0xff) { 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=~offset; 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strings.append((char)(length>>8), errorCode); 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strings.append((char)length, errorCode); 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org stringOffset=offset; 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=val; 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strings.append(s, errorCode); 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const { 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // TODO: add StringPiece::compare(), see ticket #8187 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece thisString=getString(strings); 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece otherString=other.getString(strings); 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t lengthDiff=thisString.length()-otherString.length(); 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t commonLength; 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(lengthDiff<=0) { 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org commonLength=thisString.length(); 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org commonLength=otherString.length(); 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength); 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return diff!=0 ? diff : lengthDiff; 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode) 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0), 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes(NULL), bytesCapacity(0), bytesLength(0) { 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strings=new CharString(); 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(strings==NULL) { 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::~BytesTrieBuilder() { 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete strings; 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] elements; 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(bytes); 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder & 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) { 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytesLength>0) { 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Cannot add elements after building. 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_NO_WRITE_PERMISSION; 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(elementsLength==elementsCapacity) { 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newCapacity; 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(elementsCapacity==0) { 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newCapacity=1024; 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newCapacity=4*elementsCapacity; 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BytesTrieElement *newElements=new BytesTrieElement[newCapacity]; 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newElements==NULL) { 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; // error instead of dereferencing null 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(elementsLength>0) { 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement)); 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org delete[] elements; 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elements=newElements; 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elementsCapacity=newCapacity; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elements[elementsLength++].setTo(s, value, *strings, errorCode); 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_BEGIN 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t U_CALLCONV 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcompareElementStrings(const void *context, const void *left, const void *right) { 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const CharString *strings=static_cast<const CharString *>(context); 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left); 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right); 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return leftElement->compareStringTo(*rightElement, *strings); 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_CDECL_END 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrie * 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buildBytes(buildOption, errorCode); 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BytesTrie *newTrie=NULL; 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(errorCode)) { 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength)); 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newTrie==NULL) { 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=NULL; // The new trie now owns the array. 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesCapacity=0; 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return newTrie; 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgStringPiece 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org buildBytes(buildOption, errorCode); 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece result; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_SUCCESS(errorCode)) { 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org result.set(bytes+(bytesCapacity-bytesLength), bytesLength); 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return result; 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) { 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytes!=NULL && bytesLength>0) { 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Already built. 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytesLength==0) { 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(elementsLength==0) { 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_INDEX_OUTOFBOUNDS_ERROR; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement), 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org compareElementStrings, strings, 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org FALSE, // need not be a stable sort 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org &errorCode); 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Duplicate strings are not allowed. 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece prev=elements[0].getString(*strings); 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(int32_t i=1; i<elementsLength; ++i) { 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringPiece current=elements[i].getString(*strings); 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(prev==current) { 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_ILLEGAL_ARGUMENT_ERROR; 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org prev=current; 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // Create and byte-serialize the trie for the elements. 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesLength=0; 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t capacity=strings->length(); 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(capacity<1024) { 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org capacity=1024; 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytesCapacity<capacity) { 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(bytes); 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=static_cast<char *>(uprv_malloc(capacity)); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytes==NULL) { 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesCapacity=0; 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesCapacity=capacity; 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org StringTrieBuilder::build(buildOption, elementsLength, errorCode); 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytes==NULL) { 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org errorCode=U_MEMORY_ALLOCATION_ERROR; 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder & 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::clear() { 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org strings->clear(); 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elementsLength=0; 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesLength=0; 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return *this; 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::getElementStringLength(int32_t i) const { 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return elements[i].getStringLength(*strings); 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUChar 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const { 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (uint8_t)elements[i].charAt(byteIndex, *strings); 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::getElementValue(int32_t i) const { 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return elements[i].getValue(); 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const { 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const BytesTrieElement &firstElement=elements[first]; 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const BytesTrieElement &lastElement=elements[last]; 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t minStringLength=firstElement.getStringLength(*strings); 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(++byteIndex<minStringLength && 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org firstElement.charAt(byteIndex, *strings)== 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org lastElement.charAt(byteIndex, *strings)) {} 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return byteIndex; 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const { 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=0; // Number of different bytes at byteIndex. 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=start; 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char byte=elements[i++].charAt(byteIndex, *strings); 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) { 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++length; 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(i<limit); 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return length; 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const { 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char byte=elements[i++].charAt(byteIndex, *strings); 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(byte==elements[i].charAt(byteIndex, *strings)) { 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(--count>0); 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return i; 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const { 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char b=(char)byte; 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(b==elements[i].charAt(byteIndex, *strings)) { 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++i; 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return i; 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode) 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org : LinearMatchNode(len, nextNode), s(bytes) { 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hash=hash*37+ustr_hashCharsN(bytes, len); 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const { 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(this==&other) { 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!LinearMatchNode::operator==(other)) { 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const BTLinearMatchNode &o=(const BTLinearMatchNode &)other; 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0==uprv_memcmp(s, o.s, length); 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) { 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org BytesTrieBuilder &b=(BytesTrieBuilder &)builder; 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org next->write(builder); 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org b.write(s, length); 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=b.write(b.getMinLinearMatch()+length-1); 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgStringTrieBuilder::Node * 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length, 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org Node *nextNode) const { 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return new BTLinearMatchNode( 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elements[i].getString(*strings).data()+byteIndex, 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length, 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org nextNode); 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgUBool 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::ensureCapacity(int32_t length) { 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(bytes==NULL) { 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; // previous memory allocation had failed 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(length>bytesCapacity) { 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newCapacity=bytesCapacity; 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org do { 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org newCapacity*=2; 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } while(newCapacity<=length); 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char *newBytes=static_cast<char *>(uprv_malloc(newCapacity)); 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(newBytes==NULL) { 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // unable to allocate memory 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(bytes); 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=NULL; 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesCapacity=0; 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return FALSE; 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(newBytes+(newCapacity-bytesLength), 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes+(bytesCapacity-bytesLength), bytesLength); 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(bytes); 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes=newBytes; 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesCapacity=newCapacity; 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return TRUE; 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::write(int32_t byte) { 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newLength=bytesLength+1; 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ensureCapacity(newLength)) { 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesLength=newLength; 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytes[bytesCapacity-bytesLength]=(char)byte; 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return bytesLength; 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::write(const char *b, int32_t length) { 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t newLength=bytesLength+length; 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(ensureCapacity(newLength)) { 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org bytesLength=newLength; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length); 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return bytesLength; 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) { 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return write(elements[i].getString(*strings).data()+byteIndex, length); 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) { 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(0<=i && i<=BytesTrie::kMaxOneByteValue) { 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal); 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char intBytes[5]; 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length=1; 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<0 || i>0xffffff) { 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)BytesTrie::kFiveByteValueLead; 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[1]=(char)((uint32_t)i>>24); 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[2]=(char)((uint32_t)i>>16); 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[3]=(char)((uint32_t)i>>8); 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[4]=(char)i; 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=5; 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // } else if(i<=BytesTrie::kMaxOneByteValue) { 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org // intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i); 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=BytesTrie::kMaxTwoByteValue) { 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8)); 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=BytesTrie::kMaxThreeByteValue) { 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16)); 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)BytesTrie::kFourByteValueLead; 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[1]=(char)(i>>16); 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[length++]=(char)(i>>8); 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[length++]=(char)i; 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)((intBytes[0]<<1)|isFinal); 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return write(intBytes, length); 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) { 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t offset=write(node); 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(hasValue) { 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org offset=writeValueAndFinal(value, FALSE); 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return offset; 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgint32_t 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgBytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) { 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t i=bytesLength-jumpTarget; 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_ASSERT(i>=0); 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=BytesTrie::kMaxOneByteDelta) { 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return write(i); 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char intBytes[5]; 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t length; 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=BytesTrie::kMaxTwoByteDelta) { 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8)); 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=1; 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=BytesTrie::kMaxThreeByteDelta) { 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16)); 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=2; 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(i<=0xffffff) { 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)BytesTrie::kFourByteDeltaLead; 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=3; 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead; 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[1]=(char)(i>>24); 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org length=4; 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[1]=(char)(i>>16); 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[1]=(char)(i>>8); 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org intBytes[length++]=(char)i; 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return write(intBytes, length); 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgU_NAMESPACE_END 502