1b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
2b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2010-2011, International Business Machines
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  ucharstrie.h
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010nov14
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/appendable.h"
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ucharstrie.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
1983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h"
21b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::~UCharsTrie() {
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_free(ownedArray_);
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::current() const {
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node;
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                valueResult(node) : USTRINGTRIE_NO_VALUE;
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
4283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::firstForCodePoint(UChar32 cp) {
4383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return cp<=0xffff ?
4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        first(cp) :
4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            next(U16_TRAIL(cp)) :
4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            USTRINGTRIE_NO_MATCH);
4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult
5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::nextForCodePoint(UChar32 cp) {
5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return cp<=0xffff ?
5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        next(cp) :
5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            next(U16_TRAIL(cp)) :
5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            USTRINGTRIE_NO_MATCH);
5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult
60b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
61b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Branch according to the current unit.
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length==0) {
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=*pos++;
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++length;
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The length of the branch is the number of units to select from.
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The data structure encodes a binary search.
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar<*pos++) {
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length>>=1;
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=jumpByDelta(pos);
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=length-(length>>1);
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipDelta(pos);
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Drop down to linear search for the last few units.
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // and divides length by 2.
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar==*pos++) {
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UStringTrieResult result;
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t node=*pos;
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node&kValueIsFinal) {
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Leave the final value for getValue() to read.
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result=USTRINGTRIE_FINAL_VALUE;
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Use the non-final value as the jump delta.
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // int32_t delta=readValue(pos, node);
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                int32_t delta;
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(node<kMinTwoUnitValueLead) {
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=node;
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else if(node<kThreeUnitValueLead) {
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=(pos[0]<<16)|pos[1];
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos+=2;
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // end readValue()
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=delta;
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos;
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=pos;
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return result;
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        --length;
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos);
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(length>1);
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(uchar==*pos++) {
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=pos;
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos;
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        stop();
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node<kMinLinearMatch) {
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return branchNext(pos, node, uchar);
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node<kMinValueLead) {
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Match the first of length+1 units.
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(uchar==*pos++) {
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                remainingMatchLength_=--length;
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos_=pos;
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return (length<0 && (node=*pos)>=kMinValueLead) ?
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        valueResult(node) : USTRINGTRIE_NO_VALUE;
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // No match.
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node&kValueIsFinal) {
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // No further matching units.
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Skip intermediate value.
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    stop();
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return USTRINGTRIE_NO_MATCH;
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(int32_t uchar) {
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>=0) {
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Remaining part of a linear-match node.
161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar==*pos++) {
162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            remainingMatchLength_=--length;
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=pos;
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t node;
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return (length<0 && (node=*pos)>=kMinValueLead) ?
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    valueResult(node) : USTRINGTRIE_NO_VALUE;
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            stop();
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return USTRINGTRIE_NO_MATCH;
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return nextImpl(pos, uchar);
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(const UChar *s, int32_t sLength) {
177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(sLength<0 ? *s==0 : sLength==0) {
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Empty input.
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return current();
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Fetch the next input unit, if there is one.
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Continue a linear-match node without rechecking sLength<0.
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t uchar;
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(sLength<0) {
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            for(;;) {
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if((uchar=*s++)==0) {
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos_=pos;
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t node;
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return (length<0 && (node=*pos)>=kMinValueLead) ?
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            valueResult(node) : USTRINGTRIE_NO_VALUE;
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(length<0) {
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            for(;;) {
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(sLength==0) {
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos_=pos;
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t node;
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return (length<0 && (node=*pos)>=kMinValueLead) ?
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            valueResult(node) : USTRINGTRIE_NO_VALUE;
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uchar=*s++;
220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --sLength;
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(length<0) {
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos++;
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(;;) {
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node<kMinLinearMatch) {
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                UStringTrieResult result=branchNext(pos, node, uchar);
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(result==USTRINGTRIE_NO_MATCH) {
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Fetch the next input unit, if there is one.
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(sLength<0) {
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if((uchar=*s++)==0) {
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        return result;
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(sLength==0) {
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        return result;
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    uchar=*s++;
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    --sLength;
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(result==USTRINGTRIE_FINAL_VALUE) {
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // No further matching units.
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos++;
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(node<kMinValueLead) {
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Match length+1 units.
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                length=node-kMinLinearMatch;  // Actual match length minus 1.
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(node&kValueIsFinal) {
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // No further matching units.
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                stop();
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return USTRINGTRIE_NO_MATCH;
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Skip intermediate value.
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos=skipNodeValue(pos, node);
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node&=kNodeTypeMask;
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UChar *
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                      UBool haveUniqueValue, int32_t &uniqueValue) {
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore the comparison unit
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return NULL;
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=length-(length>>1);
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipDelta(pos);
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore a comparison unit
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // handle its value
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos++;
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool isFinal=(UBool)(node>>15);
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        node&=0x7fff;
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value=readValue(pos, node);
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos, node);
301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(isFinal) {
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(haveUniqueValue) {
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(value!=uniqueValue) {
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return NULL;
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uniqueValue=value;
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                haveUniqueValue=TRUE;
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return NULL;
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            haveUniqueValue=TRUE;
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(--length>1);
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return pos+1;  // ignore the last comparison unit
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node<kMinLinearMatch) {
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node==0) {
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos++;
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(pos==NULL) {
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return FALSE;
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            haveUniqueValue=TRUE;
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node<kMinValueLead) {
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // linear-match node
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UBool isFinal=(UBool)(node>>15);
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t value;
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(isFinal) {
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                value=readValue(pos, node&0x7fff);
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                value=readNodeValue(pos, node);
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(haveUniqueValue) {
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(value!=uniqueValue) {
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return FALSE;
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uniqueValue=value;
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                haveUniqueValue=TRUE;
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(isFinal) {
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return TRUE;
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextUChars(Appendable &out) const {
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(remainingMatchLength_>=0) {
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos);  // Next unit of a pending linear-match node.
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(node>=kMinValueLead) {
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node&kValueIsFinal) {
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return 0;
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(node<kMinLinearMatch) {
383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node==0) {
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.reserveAppendCapacity(++node);
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        getNextBranchUChars(pos, node, out);
388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return node;
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // First unit of the linear-match node.
391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos);
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore the comparison unit
400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=length-(length>>1);
402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipDelta(pos);
403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos++);
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos);
407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(--length>1);
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    out.appendCodeUnit(*pos);
409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
412