1/*
2*******************************************************************************
3*   Copyright (C) 2010-2011, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  ucharstrieiterator.h
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010nov15
12*   created by: Markus W. Scherer
13*/
14
15#include "unicode/utypes.h"
16#include "unicode/ucharstrie.h"
17#include "unicode/unistr.h"
18#include "uvectr32.h"
19
20U_NAMESPACE_BEGIN
21
22UCharsTrie::Iterator::Iterator(const UChar *trieUChars, int32_t maxStringLength,
23                               UErrorCode &errorCode)
24        : uchars_(trieUChars),
25          pos_(uchars_), initialPos_(uchars_),
26          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
27          skipValue_(FALSE),
28          maxLength_(maxStringLength), value_(0), stack_(NULL) {
29    if(U_FAILURE(errorCode)) {
30        return;
31    }
32    // stack_ is a pointer so that it's easy to turn ucharstrie.h into
33    // a public API header for which we would want it to depend only on
34    // other public headers.
35    // Unlike UCharsTrie itself, its Iterator performs memory allocations anyway
36    // via the UnicodeString and UVector32 implementations, so this additional
37    // cost is minimal.
38    stack_=new UVector32(errorCode);
39    if(stack_==NULL) {
40        errorCode=U_MEMORY_ALLOCATION_ERROR;
41    }
42}
43
44UCharsTrie::Iterator::Iterator(const UCharsTrie &trie, int32_t maxStringLength,
45                               UErrorCode &errorCode)
46        : uchars_(trie.uchars_), pos_(trie.pos_), initialPos_(trie.pos_),
47          remainingMatchLength_(trie.remainingMatchLength_),
48          initialRemainingMatchLength_(trie.remainingMatchLength_),
49          skipValue_(FALSE),
50          maxLength_(maxStringLength), value_(0), stack_(NULL) {
51    if(U_FAILURE(errorCode)) {
52        return;
53    }
54    stack_=new UVector32(errorCode);
55    if(U_FAILURE(errorCode)) {
56        return;
57    }
58    if(stack_==NULL) {
59        errorCode=U_MEMORY_ALLOCATION_ERROR;
60        return;
61    }
62    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
63    if(length>=0) {
64        // Pending linear-match node, append remaining UChars to str_.
65        ++length;
66        if(maxLength_>0 && length>maxLength_) {
67            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
68        }
69        str_.append(pos_, length);
70        pos_+=length;
71        remainingMatchLength_-=length;
72    }
73}
74
75UCharsTrie::Iterator::~Iterator() {
76    delete stack_;
77}
78
79UCharsTrie::Iterator &
80UCharsTrie::Iterator::reset() {
81    pos_=initialPos_;
82    remainingMatchLength_=initialRemainingMatchLength_;
83    skipValue_=FALSE;
84    int32_t length=remainingMatchLength_+1;  // Remaining match length.
85    if(maxLength_>0 && length>maxLength_) {
86        length=maxLength_;
87    }
88    str_.truncate(length);
89    pos_+=length;
90    remainingMatchLength_-=length;
91    stack_->setSize(0);
92    return *this;
93}
94
95UBool
96UCharsTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
97
98UBool
99UCharsTrie::Iterator::next(UErrorCode &errorCode) {
100    if(U_FAILURE(errorCode)) {
101        return FALSE;
102    }
103    const UChar *pos=pos_;
104    if(pos==NULL) {
105        if(stack_->isEmpty()) {
106            return FALSE;
107        }
108        // Pop the state off the stack and continue with the next outbound edge of
109        // the branch node.
110        int32_t stackSize=stack_->size();
111        int32_t length=stack_->elementAti(stackSize-1);
112        pos=uchars_+stack_->elementAti(stackSize-2);
113        stack_->setSize(stackSize-2);
114        str_.truncate(length&0xffff);
115        length=(int32_t)((uint32_t)length>>16);
116        if(length>1) {
117            pos=branchNext(pos, length, errorCode);
118            if(pos==NULL) {
119                return TRUE;  // Reached a final value.
120            }
121        } else {
122            str_.append(*pos++);
123        }
124    }
125    if(remainingMatchLength_>=0) {
126        // We only get here if we started in a pending linear-match node
127        // with more than maxLength remaining units.
128        return truncateAndStop();
129    }
130    for(;;) {
131        int32_t node=*pos++;
132        if(node>=kMinValueLead) {
133            if(skipValue_) {
134                pos=skipNodeValue(pos, node);
135                node&=kNodeTypeMask;
136                skipValue_=FALSE;
137            } else {
138                // Deliver value for the string so far.
139                UBool isFinal=(UBool)(node>>15);
140                if(isFinal) {
141                    value_=readValue(pos, node&0x7fff);
142                } else {
143                    value_=readNodeValue(pos, node);
144                }
145                if(isFinal || (maxLength_>0 && str_.length()==maxLength_)) {
146                    pos_=NULL;
147                } else {
148                    // We cannot skip the value right here because it shares its
149                    // lead unit with a match node which we have to evaluate
150                    // next time.
151                    // Instead, keep pos_ on the node lead unit itself.
152                    pos_=pos-1;
153                    skipValue_=TRUE;
154                }
155                return TRUE;
156            }
157        }
158        if(maxLength_>0 && str_.length()==maxLength_) {
159            return truncateAndStop();
160        }
161        if(node<kMinLinearMatch) {
162            if(node==0) {
163                node=*pos++;
164            }
165            pos=branchNext(pos, node+1, errorCode);
166            if(pos==NULL) {
167                return TRUE;  // Reached a final value.
168            }
169        } else {
170            // Linear-match node, append length units to str_.
171            int32_t length=node-kMinLinearMatch+1;
172            if(maxLength_>0 && str_.length()+length>maxLength_) {
173                str_.append(pos, maxLength_-str_.length());
174                return truncateAndStop();
175            }
176            str_.append(pos, length);
177            pos+=length;
178        }
179    }
180}
181
182// Branch node, needs to take the first outbound edge and push state for the rest.
183const UChar *
184UCharsTrie::Iterator::branchNext(const UChar *pos, int32_t length, UErrorCode &errorCode) {
185    while(length>kMaxBranchLinearSubNodeLength) {
186        ++pos;  // ignore the comparison unit
187        // Push state for the greater-or-equal edge.
188        stack_->addElement((int32_t)(skipDelta(pos)-uchars_), errorCode);
189        stack_->addElement(((length-(length>>1))<<16)|str_.length(), errorCode);
190        // Follow the less-than edge.
191        length>>=1;
192        pos=jumpByDelta(pos);
193    }
194    // List of key-value pairs where values are either final values or jump deltas.
195    // Read the first (key, value) pair.
196    UChar trieUnit=*pos++;
197    int32_t node=*pos++;
198    UBool isFinal=(UBool)(node>>15);
199    int32_t value=readValue(pos, node&=0x7fff);
200    pos=skipValue(pos, node);
201    stack_->addElement((int32_t)(pos-uchars_), errorCode);
202    stack_->addElement(((length-1)<<16)|str_.length(), errorCode);
203    str_.append(trieUnit);
204    if(isFinal) {
205        pos_=NULL;
206        value_=value;
207        return NULL;
208    } else {
209        return pos+value;
210    }
211}
212
213U_NAMESPACE_END
214