164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/*
4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Copyright (C) 2010-2011, International Business Machines
6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   Corporation and others.  All Rights Reserved.
7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*******************************************************************************
8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   file name:  ucharstrie.h
9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   encoding:   US-ASCII
10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   tab size:   8 (not used)
11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   indentation:4
12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*
13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created on: 2010nov14
14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*   created by: Markus W. Scherer
15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/
16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h"
18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/appendable.h"
19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ucharstrie.h"
20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h"
2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h"
22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h"
23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h"
24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN
26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::~UCharsTrie() {
28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    uprv_free(ownedArray_);
29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::current() const {
33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node;
38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ?
39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                valueResult(node) : USTRINGTRIE_NO_VALUE;
40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::firstForCodePoint(UChar32 cp) {
4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return cp<=0xffff ?
4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        first(cp) :
4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ?
4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            next(U16_TRAIL(cp)) :
4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            USTRINGTRIE_NO_MATCH);
5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult
5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::nextForCodePoint(UChar32 cp) {
5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius    return cp<=0xffff ?
5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        next(cp) :
5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius        (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ?
5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            next(U16_TRAIL(cp)) :
5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius            USTRINGTRIE_NO_MATCH);
5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius}
6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius
6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult
62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) {
63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Branch according to the current unit.
64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length==0) {
65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=*pos++;
66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    ++length;
68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The length of the branch is the number of units to select from.
69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // The data structure encodes a binary search.
70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar<*pos++) {
72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length>>=1;
73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=jumpByDelta(pos);
74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            length=length-(length>>1);
76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipDelta(pos);
77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // Drop down to linear search for the last few units.
80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3
81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    // and divides length by 2.
82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar==*pos++) {
84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UStringTrieResult result;
85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t node=*pos;
86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node&kValueIsFinal) {
87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Leave the final value for getValue() to read.
88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result=USTRINGTRIE_FINAL_VALUE;
89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Use the non-final value as the jump delta.
91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // int32_t delta=readValue(pos, node);
93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                int32_t delta;
94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(node<kMinTwoUnitValueLead) {
95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=node;
96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else if(node<kThreeUnitValueLead) {
97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=((node-kMinTwoUnitValueLead)<<16)|*pos++;
98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    delta=(pos[0]<<16)|pos[1];
100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos+=2;
101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // end readValue()
103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos+=delta;
104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos;
105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=pos;
108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return result;
109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        --length;
111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos);
112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(length>1);
113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(uchar==*pos++) {
114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos_=pos;
115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos;
116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE;
117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        stop();
119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::nextImpl(const UChar *pos, int32_t uchar) {
125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node<kMinLinearMatch) {
128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return branchNext(pos, node, uchar);
129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node<kMinValueLead) {
130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Match the first of length+1 units.
131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t length=node-kMinLinearMatch;  // Actual match length minus 1.
132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(uchar==*pos++) {
133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                remainingMatchLength_=--length;
134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos_=pos;
135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return (length<0 && (node=*pos)>=kMinValueLead) ?
136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        valueResult(node) : USTRINGTRIE_NO_VALUE;
137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // No match.
139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node&kValueIsFinal) {
142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // No further matching units.
143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            break;
144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // Skip intermediate value.
146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    stop();
151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return USTRINGTRIE_NO_MATCH;
152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(int32_t uchar) {
156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(length>=0) {
162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Remaining part of a linear-match node.
163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(uchar==*pos++) {
164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            remainingMatchLength_=--length;
165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos_=pos;
166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t node;
167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return (length<0 && (node=*pos)>=kMinValueLead) ?
168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    valueResult(node) : USTRINGTRIE_NO_VALUE;
169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            stop();
171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return USTRINGTRIE_NO_MATCH;
172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return nextImpl(pos, uchar);
175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult
178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(const UChar *s, int32_t sLength) {
179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(sLength<0 ? *s==0 : sLength==0) {
180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Empty input.
181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return current();
182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return USTRINGTRIE_NO_MATCH;
186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Fetch the next input unit, if there is one.
190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // Continue a linear-match node without rechecking sLength<0.
191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t uchar;
192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(sLength<0) {
193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            for(;;) {
194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if((uchar=*s++)==0) {
195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos_=pos;
197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t node;
198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return (length<0 && (node=*pos)>=kMinValueLead) ?
199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            valueResult(node) : USTRINGTRIE_NO_VALUE;
200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(length<0) {
202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            for(;;) {
214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(sLength==0) {
215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    pos_=pos;
217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    int32_t node;
218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return (length<0 && (node=*pos)>=kMinValueLead) ?
219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                            valueResult(node) : USTRINGTRIE_NO_VALUE;
220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uchar=*s++;
222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --sLength;
223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(length<0) {
224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    remainingMatchLength_=length;
225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    break;
226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos++;
236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        for(;;) {
237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node<kMinLinearMatch) {
238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                UStringTrieResult result=branchNext(pos, node, uchar);
239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(result==USTRINGTRIE_NO_MATCH) {
240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Fetch the next input unit, if there is one.
243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(sLength<0) {
244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if((uchar=*s++)==0) {
245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        return result;
246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                } else {
248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    if(sLength==0) {
249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                        return result;
250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    }
251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    uchar=*s++;
252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    --sLength;
253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(result==USTRINGTRIE_FINAL_VALUE) {
255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    // No further matching units.
256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos=pos_;  // branchNext() advanced pos and wrote it to pos_ .
260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos++;
261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(node<kMinValueLead) {
262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Match length+1 units.
263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                length=node-kMinLinearMatch;  // Actual match length minus 1.
264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(uchar!=*pos) {
265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    stop();
266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return USTRINGTRIE_NO_MATCH;
267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                ++pos;
269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                --length;
270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                break;
271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else if(node&kValueIsFinal) {
272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // No further matching units.
273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                stop();
274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return USTRINGTRIE_NO_MATCH;
275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                // Skip intermediate value.
277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                pos=skipNodeValue(pos, node);
278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node&=kNodeTypeMask;
279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UChar *
285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length,
286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                                      UBool haveUniqueValue, int32_t &uniqueValue) {
287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore the comparison unit
289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) {
290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return NULL;
291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=length-(length>>1);
293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipDelta(pos);
294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore a comparison unit
297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // handle its value
298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t node=*pos++;
299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        UBool isFinal=(UBool)(node>>15);
300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        node&=0x7fff;
301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        int32_t value=readValue(pos, node);
302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos, node);
303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(isFinal) {
304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(haveUniqueValue) {
305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(value!=uniqueValue) {
306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return NULL;
307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uniqueValue=value;
310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                haveUniqueValue=TRUE;
311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) {
314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return NULL;
315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            haveUniqueValue=TRUE;
317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(--length>1);
319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    return pos+1;  // ignore the last comparison unit
320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool
323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) {
324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    for(;;) {
326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node<kMinLinearMatch) {
327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(node==0) {
328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                node=*pos++;
329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue);
331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(pos==NULL) {
332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return FALSE;
333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            haveUniqueValue=TRUE;
335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else if(node<kMinValueLead) {
337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            // linear-match node
338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos+=node-kMinLinearMatch+1;  // Ignore the match units.
339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            UBool isFinal=(UBool)(node>>15);
342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            int32_t value;
343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(isFinal) {
344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                value=readValue(pos, node&0x7fff);
345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                value=readNodeValue(pos, node);
347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(haveUniqueValue) {
349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                if(value!=uniqueValue) {
350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                    return FALSE;
351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                }
352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            } else {
353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                uniqueValue=value;
354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                haveUniqueValue=TRUE;
355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            if(isFinal) {
357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho                return TRUE;
358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            }
359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t
366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextUChars(Appendable &out) const {
367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    const UChar *pos=pos_;
368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(pos==NULL) {
369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 0;
370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(remainingMatchLength_>=0) {
372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos);  // Next unit of a pending linear-match node.
373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    int32_t node=*pos++;
376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(node>=kMinValueLead) {
377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node&kValueIsFinal) {
378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            return 0;
379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        } else {
380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            pos=skipNodeValue(pos, node);
381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node&=kNodeTypeMask;
382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    if(node<kMinLinearMatch) {
385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        if(node==0) {
386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho            node=*pos++;
387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        }
388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.reserveAppendCapacity(++node);
389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        getNextBranchUChars(pos, node, out);
390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return node;
391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } else {
392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        // First unit of the linear-match node.
393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos);
394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        return 1;
395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid
399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) {
400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    while(length>kMaxBranchLinearSubNodeLength) {
401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        ++pos;  // ignore the comparison unit
402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        getNextBranchUChars(jumpByDelta(pos), length>>1, out);
403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        length=length-(length>>1);
404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipDelta(pos);
405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    }
406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    do {
407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        out.appendCodeUnit(*pos++);
408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho        pos=skipValue(pos);
409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    } while(--length>1);
410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho    out.appendCodeUnit(*pos);
411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho}
412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho
413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END
414