1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius*   Copyright (C) 2010-2012, International Business Machines
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  bytestriebuilder.cpp
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010sep25
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestrie.h"
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/bytestriebuilder.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/stringpiece.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "charstr.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uhash.h"
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uarrsort.h"
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
24103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "ustr_imp.h"
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * Note: This builder implementation stores (bytes, value) pairs with full copies
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * of the byte sequences, until the BytesTrie is built.
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho * It might(!) take less memory if we collected the data in a temporary, dynamic trie.
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho */
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoclass BytesTrieElement : public UMemory {
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehopublic:
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Use compiler's default constructor, initializes nothing.
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    void setTo(const StringPiece &s, int32_t val, CharString &strings, UErrorCode &errorCode);
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringPiece getString(const CharString &strings) const {
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t offset=stringOffset;
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t length;
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(offset>=0) {
44b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=(uint8_t)strings[offset++];
45b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
46b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            offset=~offset;
47b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
48b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            offset+=2;
49b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
50b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return StringPiece(strings.data()+offset, length);
51b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
52b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t getStringLength(const CharString &strings) const {
53b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t offset=stringOffset;
54b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(offset>=0) {
55b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return (uint8_t)strings[offset];
56b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
57b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            offset=~offset;
58b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return ((int32_t)(uint8_t)strings[offset]<<8)|(uint8_t)strings[offset+1];
59b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char charAt(int32_t index, const CharString &strings) const { return data(strings)[index]; }
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t getValue() const { return value; }
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t compareStringTo(const BytesTrieElement &o, const CharString &strings) const;
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoprivate:
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const char *data(const CharString &strings) const {
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t offset=stringOffset;
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(offset>=0) {
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++offset;
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            offset=~offset+2;
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return strings.data()+offset;
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // If the stringOffset is non-negative, then the first strings byte contains
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the string length.
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // If the stringOffset is negative, then the first two strings bytes contain
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // the string length (big-endian), and the offset needs to be bit-inverted.
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // (Compared with a stringLength field here, this saves 3 bytes per string for most strings.)
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t stringOffset;
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t value;
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho};
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::setTo(const StringPiece &s, int32_t val,
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        CharString &strings, UErrorCode &errorCode) {
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_FAILURE(errorCode)) {
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=s.length();
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>0xffff) {
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Too long: We store the length in 1 or 2 bytes.
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t offset=strings.length();
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>0xff) {
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        offset=~offset;
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        strings.append((char)(length>>8), errorCode);
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strings.append((char)length, errorCode);
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    stringOffset=offset;
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    value=val;
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strings.append(s, errorCode);
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieElement::compareStringTo(const BytesTrieElement &other, const CharString &strings) const {
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // TODO: add StringPiece::compare(), see ticket #8187
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringPiece thisString=getString(strings);
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringPiece otherString=other.getString(strings);
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t lengthDiff=thisString.length()-otherString.length();
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t commonLength;
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(lengthDiff<=0) {
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        commonLength=thisString.length();
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        commonLength=otherString.length();
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t diff=uprv_memcmp(thisString.data(), otherString.data(), commonLength);
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return diff!=0 ? diff : lengthDiff;
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BytesTrieBuilder(UErrorCode &errorCode)
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        : strings(NULL), elements(NULL), elementsCapacity(0), elementsLength(0),
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho          bytes(NULL), bytesCapacity(0), bytesLength(0) {
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_FAILURE(errorCode)) {
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strings=new CharString();
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(strings==NULL) {
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::~BytesTrieBuilder() {
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete strings;
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    delete[] elements;
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_free(bytes);
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder &
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::add(const StringPiece &s, int32_t value, UErrorCode &errorCode) {
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_FAILURE(errorCode)) {
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytesLength>0) {
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Cannot add elements after building.
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        errorCode=U_NO_WRITE_PERMISSION;
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return *this;
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(elementsLength==elementsCapacity) {
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t newCapacity;
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(elementsCapacity==0) {
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            newCapacity=1024;
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            newCapacity=4*elementsCapacity;
161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        BytesTrieElement *newElements=new BytesTrieElement[newCapacity];
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(newElements==NULL) {
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            errorCode=U_MEMORY_ALLOCATION_ERROR;
165103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius            return *this; // error instead of dereferencing null
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(elementsLength>0) {
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_memcpy(newElements, elements, elementsLength*sizeof(BytesTrieElement));
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        delete[] elements;
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        elements=newElements;
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        elementsCapacity=newCapacity;
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    elements[elementsLength++].setTo(s, value, *strings, errorCode);
175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_BEGIN
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehostatic int32_t U_CALLCONV
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehocompareElementStrings(const void *context, const void *left, const void *right) {
18254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    const CharString *strings=static_cast<const CharString *>(context);
18354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    const BytesTrieElement *leftElement=static_cast<const BytesTrieElement *>(left);
18454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    const BytesTrieElement *rightElement=static_cast<const BytesTrieElement *>(right);
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return leftElement->compareStringTo(*rightElement, *strings);
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_CDECL_END
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrie *
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::build(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    buildBytes(buildOption, errorCode);
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrie *newTrie=NULL;
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_SUCCESS(errorCode)) {
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        newTrie=new BytesTrie(bytes, bytes+(bytesCapacity-bytesLength));
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(newTrie==NULL) {
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            errorCode=U_MEMORY_ALLOCATION_ERROR;
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytes=NULL;  // The new trie now owns the array.
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytesCapacity=0;
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return newTrie;
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringPiece
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildStringPiece(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    buildBytes(buildOption, errorCode);
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringPiece result;
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_SUCCESS(errorCode)) {
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        result.set(bytes+(bytesCapacity-bytesLength), bytesLength);
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return result;
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::buildBytes(UStringTrieBuildOption buildOption, UErrorCode &errorCode) {
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(U_FAILURE(errorCode)) {
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytes!=NULL && bytesLength>0) {
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Already built.
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return;
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytesLength==0) {
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(elementsLength==0) {
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            errorCode=U_INDEX_OUTOFBOUNDS_ERROR;
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return;
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_sortArray(elements, elementsLength, (int32_t)sizeof(BytesTrieElement),
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                      compareElementStrings, strings,
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                      FALSE,  // need not be a stable sort
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                      &errorCode);
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(U_FAILURE(errorCode)) {
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return;
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Duplicate strings are not allowed.
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        StringPiece prev=elements[0].getString(*strings);
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(int32_t i=1; i<elementsLength; ++i) {
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            StringPiece current=elements[i].getString(*strings);
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(prev==current) {
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                errorCode=U_ILLEGAL_ARGUMENT_ERROR;
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return;
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            prev=current;
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Create and byte-serialize the trie for the elements.
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    bytesLength=0;
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t capacity=strings->length();
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(capacity<1024) {
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        capacity=1024;
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytesCapacity<capacity) {
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_free(bytes);
25654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        bytes=static_cast<char *>(uprv_malloc(capacity));
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(bytes==NULL) {
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            errorCode=U_MEMORY_ALLOCATION_ERROR;
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytesCapacity=0;
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return;
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytesCapacity=capacity;
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    StringTrieBuilder::build(buildOption, elementsLength, errorCode);
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytes==NULL) {
266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        errorCode=U_MEMORY_ALLOCATION_ERROR;
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder &
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::clear() {
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    strings->clear();
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    elementsLength=0;
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    bytesLength=0;
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return *this;
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementStringLength(int32_t i) const {
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return elements[i].getStringLength(*strings);
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUChar
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementUnit(int32_t i, int32_t byteIndex) const {
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return (uint8_t)elements[i].charAt(byteIndex, *strings);
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getElementValue(int32_t i) const {
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return elements[i].getValue();
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::getLimitOfLinearMatch(int32_t first, int32_t last, int32_t byteIndex) const {
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const BytesTrieElement &firstElement=elements[first];
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const BytesTrieElement &lastElement=elements[last];
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t minStringLength=firstElement.getStringLength(*strings);
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(++byteIndex<minStringLength &&
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            firstElement.charAt(byteIndex, *strings)==
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            lastElement.charAt(byteIndex, *strings)) {}
301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return byteIndex;
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::countElementUnits(int32_t start, int32_t limit, int32_t byteIndex) const {
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=0;  // Number of different bytes at byteIndex.
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i=start;
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        char byte=elements[i++].charAt(byteIndex, *strings);
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        while(i<limit && byte==elements[i].charAt(byteIndex, *strings)) {
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++i;
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++length;
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(i<limit);
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return length;
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::skipElementsBySomeUnits(int32_t i, int32_t byteIndex, int32_t count) const {
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        char byte=elements[i++].charAt(byteIndex, *strings);
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        while(byte==elements[i].charAt(byteIndex, *strings)) {
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            ++i;
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(--count>0);
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return i;
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::indexOfElementWithNextUnit(int32_t i, int32_t byteIndex, UChar byte) const {
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char b=(char)byte;
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(b==elements[i].charAt(byteIndex, *strings)) {
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++i;
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return i;
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::BTLinearMatchNode(const char *bytes, int32_t len, Node *nextNode)
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        : LinearMatchNode(len, nextNode), s(bytes) {
340103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    hash=hash*37+ustr_hashCharsN(bytes, len);
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::operator==(const Node &other) const {
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(this==&other) {
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return TRUE;
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(!LinearMatchNode::operator==(other)) {
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const BTLinearMatchNode &o=(const BTLinearMatchNode &)other;
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return 0==uprv_memcmp(s, o.s, length);
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::BTLinearMatchNode::write(StringTrieBuilder &builder) {
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    BytesTrieBuilder &b=(BytesTrieBuilder &)builder;
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    next->write(builder);
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    b.write(s, length);
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    offset=b.write(b.getMinLinearMatch()+length-1);
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoStringTrieBuilder::Node *
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::createLinearMatchNode(int32_t i, int32_t byteIndex, int32_t length,
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                        Node *nextNode) const {
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return new BTLinearMatchNode(
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            elements[i].getString(*strings).data()+byteIndex,
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length,
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            nextNode);
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::ensureCapacity(int32_t length) {
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(bytes==NULL) {
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return FALSE;  // previous memory allocation had failed
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>bytesCapacity) {
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t newCapacity=bytesCapacity;
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        do {
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            newCapacity*=2;
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } while(newCapacity<=length);
38254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        char *newBytes=static_cast<char *>(uprv_malloc(newCapacity));
383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(newBytes==NULL) {
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // unable to allocate memory
385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            uprv_free(bytes);
386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytes=NULL;
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            bytesCapacity=0;
388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return FALSE;
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_memcpy(newBytes+(newCapacity-bytesLength),
391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    bytes+(bytesCapacity-bytesLength), bytesLength);
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_free(bytes);
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytes=newBytes;
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytesCapacity=newCapacity;
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return TRUE;
397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(int32_t byte) {
401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t newLength=bytesLength+1;
402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(ensureCapacity(newLength)) {
403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytesLength=newLength;
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytes[bytesCapacity-bytesLength]=(char)byte;
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return bytesLength;
407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::write(const char *b, int32_t length) {
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t newLength=bytesLength+length;
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(ensureCapacity(newLength)) {
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        bytesLength=newLength;
414b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        uprv_memcpy(bytes+(bytesCapacity-bytesLength), b, length);
415b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
416b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return bytesLength;
417b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
418b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
419b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
420b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeElementUnits(int32_t i, int32_t byteIndex, int32_t length) {
421b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return write(elements[i].getString(*strings).data()+byteIndex, length);
422b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
423b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
424b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
425b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndFinal(int32_t i, UBool isFinal) {
426b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(0<=i && i<=BytesTrie::kMaxOneByteValue) {
427b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return write(((BytesTrie::kMinOneByteValueLead+i)<<1)|isFinal);
428b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
429b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char intBytes[5];
430b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=1;
431b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(i<0 || i>0xffffff) {
432b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        intBytes[0]=(char)BytesTrie::kFiveByteValueLead;
43354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        intBytes[1]=(char)((uint32_t)i>>24);
43454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        intBytes[2]=(char)((uint32_t)i>>16);
43554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        intBytes[3]=(char)((uint32_t)i>>8);
436b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        intBytes[4]=(char)i;
437b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=5;
438b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // } else if(i<=BytesTrie::kMaxOneByteValue) {
439b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    //     intBytes[0]=(char)(BytesTrie::kMinOneByteValueLead+i);
440b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
441b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(i<=BytesTrie::kMaxTwoByteValue) {
442b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            intBytes[0]=(char)(BytesTrie::kMinTwoByteValueLead+(i>>8));
443b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
444b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(i<=BytesTrie::kMaxThreeByteValue) {
445b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[0]=(char)(BytesTrie::kMinThreeByteValueLead+(i>>16));
446b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
447b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[0]=(char)BytesTrie::kFourByteValueLead;
448b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[1]=(char)(i>>16);
449b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                length=2;
450b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
451b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            intBytes[length++]=(char)(i>>8);
452b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
453b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        intBytes[length++]=(char)i;
454b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
455b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    intBytes[0]=(char)((intBytes[0]<<1)|isFinal);
456b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return write(intBytes, length);
457b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
458b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
459b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
460b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeValueAndType(UBool hasValue, int32_t value, int32_t node) {
461b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t offset=write(node);
462b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(hasValue) {
463b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        offset=writeValueAndFinal(value, FALSE);
464b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
465b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return offset;
466b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
467b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
468b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
469b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoBytesTrieBuilder::writeDeltaTo(int32_t jumpTarget) {
470b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t i=bytesLength-jumpTarget;
471b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    U_ASSERT(i>=0);
472b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(i<=BytesTrie::kMaxOneByteDelta) {
473b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return write(i);
474b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
475b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    char intBytes[5];
476b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length;
477b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(i<=BytesTrie::kMaxTwoByteDelta) {
478b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        intBytes[0]=(char)(BytesTrie::kMinTwoByteDeltaLead+(i>>8));
479b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=1;
480b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
481b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(i<=BytesTrie::kMaxThreeByteDelta) {
482b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            intBytes[0]=(char)(BytesTrie::kMinThreeByteDeltaLead+(i>>16));
483b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=2;
484b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
485b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(i<=0xffffff) {
486b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[0]=(char)BytesTrie::kFourByteDeltaLead;
487b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                length=3;
488b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
489b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[0]=(char)BytesTrie::kFiveByteDeltaLead;
490b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                intBytes[1]=(char)(i>>24);
491b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                length=4;
492b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
493b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            intBytes[1]=(char)(i>>16);
494b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
495b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        intBytes[1]=(char)(i>>8);
496b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
497b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    intBytes[length++]=(char)i;
498b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return write(intBytes, length);
499b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
500b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
501b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
502