1/*
2*******************************************************************************
3*   Copyright (C) 2010-2012, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5*******************************************************************************
6*   file name:  bytestrieiterator.cpp
7*   encoding:   US-ASCII
8*   tab size:   8 (not used)
9*   indentation:4
10*
11*   created on: 2010nov03
12*   created by: Markus W. Scherer
13*/
14
15#include "unicode/utypes.h"
16#include "unicode/bytestrie.h"
17#include "unicode/stringpiece.h"
18#include "charstr.h"
19#include "uvectr32.h"
20
21U_NAMESPACE_BEGIN
22
23BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength,
24                              UErrorCode &errorCode)
25        : bytes_(static_cast<const uint8_t *>(trieBytes)),
26          pos_(bytes_), initialPos_(bytes_),
27          remainingMatchLength_(-1), initialRemainingMatchLength_(-1),
28          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
29    if(U_FAILURE(errorCode)) {
30        return;
31    }
32    // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into
33    // a public API header for which we would want it to depend only on
34    // other public headers.
35    // Unlike BytesTrie itself, its Iterator performs memory allocations anyway
36    // via the CharString and UVector32 implementations, so this additional
37    // cost is minimal.
38    str_=new CharString();
39    stack_=new UVector32(errorCode);
40    if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) {
41        errorCode=U_MEMORY_ALLOCATION_ERROR;
42    }
43}
44
45BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength,
46                              UErrorCode &errorCode)
47        : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_),
48          remainingMatchLength_(trie.remainingMatchLength_),
49          initialRemainingMatchLength_(trie.remainingMatchLength_),
50          str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) {
51    if(U_FAILURE(errorCode)) {
52        return;
53    }
54    str_=new CharString();
55    stack_=new UVector32(errorCode);
56    if(U_FAILURE(errorCode)) {
57        return;
58    }
59    if(str_==NULL || stack_==NULL) {
60        errorCode=U_MEMORY_ALLOCATION_ERROR;
61        return;
62    }
63    int32_t length=remainingMatchLength_;  // Actual remaining match length minus 1.
64    if(length>=0) {
65        // Pending linear-match node, append remaining bytes to str_.
66        ++length;
67        if(maxLength_>0 && length>maxLength_) {
68            length=maxLength_;  // This will leave remainingMatchLength>=0 as a signal.
69        }
70        str_->append(reinterpret_cast<const char *>(pos_), length, errorCode);
71        pos_+=length;
72        remainingMatchLength_-=length;
73    }
74}
75
76BytesTrie::Iterator::~Iterator() {
77    delete str_;
78    delete stack_;
79}
80
81BytesTrie::Iterator &
82BytesTrie::Iterator::reset() {
83    pos_=initialPos_;
84    remainingMatchLength_=initialRemainingMatchLength_;
85    int32_t length=remainingMatchLength_+1;  // Remaining match length.
86    if(maxLength_>0 && length>maxLength_) {
87        length=maxLength_;
88    }
89    str_->truncate(length);
90    pos_+=length;
91    remainingMatchLength_-=length;
92    stack_->setSize(0);
93    return *this;
94}
95
96UBool
97BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); }
98
99UBool
100BytesTrie::Iterator::next(UErrorCode &errorCode) {
101    if(U_FAILURE(errorCode)) {
102        return FALSE;
103    }
104    const uint8_t *pos=pos_;
105    if(pos==NULL) {
106        if(stack_->isEmpty()) {
107            return FALSE;
108        }
109        // Pop the state off the stack and continue with the next outbound edge of
110        // the branch node.
111        int32_t stackSize=stack_->size();
112        int32_t length=stack_->elementAti(stackSize-1);
113        pos=bytes_+stack_->elementAti(stackSize-2);
114        stack_->setSize(stackSize-2);
115        str_->truncate(length&0xffff);
116        length=(int32_t)((uint32_t)length>>16);
117        if(length>1) {
118            pos=branchNext(pos, length, errorCode);
119            if(pos==NULL) {
120                return TRUE;  // Reached a final value.
121            }
122        } else {
123            str_->append((char)*pos++, errorCode);
124        }
125    }
126    if(remainingMatchLength_>=0) {
127        // We only get here if we started in a pending linear-match node
128        // with more than maxLength remaining bytes.
129        return truncateAndStop();
130    }
131    for(;;) {
132        int32_t node=*pos++;
133        if(node>=kMinValueLead) {
134            // Deliver value for the byte sequence so far.
135            UBool isFinal=(UBool)(node&kValueIsFinal);
136            value_=readValue(pos, node>>1);
137            if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) {
138                pos_=NULL;
139            } else {
140                pos_=skipValue(pos, node);
141            }
142            sp_.set(str_->data(), str_->length());
143            return TRUE;
144        }
145        if(maxLength_>0 && str_->length()==maxLength_) {
146            return truncateAndStop();
147        }
148        if(node<kMinLinearMatch) {
149            if(node==0) {
150                node=*pos++;
151            }
152            pos=branchNext(pos, node+1, errorCode);
153            if(pos==NULL) {
154                return TRUE;  // Reached a final value.
155            }
156        } else {
157            // Linear-match node, append length bytes to str_.
158            int32_t length=node-kMinLinearMatch+1;
159            if(maxLength_>0 && str_->length()+length>maxLength_) {
160                str_->append(reinterpret_cast<const char *>(pos),
161                            maxLength_-str_->length(), errorCode);
162                return truncateAndStop();
163            }
164            str_->append(reinterpret_cast<const char *>(pos), length, errorCode);
165            pos+=length;
166        }
167    }
168}
169
170UBool
171BytesTrie::Iterator::truncateAndStop() {
172    pos_=NULL;
173    sp_.set(str_->data(), str_->length());
174    value_=-1;  // no real value for str
175    return TRUE;
176}
177
178// Branch node, needs to take the first outbound edge and push state for the rest.
179const uint8_t *
180BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) {
181    while(length>kMaxBranchLinearSubNodeLength) {
182        ++pos;  // ignore the comparison byte
183        // Push state for the greater-or-equal edge.
184        stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode);
185        stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode);
186        // Follow the less-than edge.
187        length>>=1;
188        pos=jumpByDelta(pos);
189    }
190    // List of key-value pairs where values are either final values or jump deltas.
191    // Read the first (key, value) pair.
192    uint8_t trieByte=*pos++;
193    int32_t node=*pos++;
194    UBool isFinal=(UBool)(node&kValueIsFinal);
195    int32_t value=readValue(pos, node>>1);
196    pos=skipValue(pos, node);
197    stack_->addElement((int32_t)(pos-bytes_), errorCode);
198    stack_->addElement(((length-1)<<16)|str_->length(), errorCode);
199    str_->append((char)trieByte, errorCode);
200    if(isFinal) {
201        pos_=NULL;
202        sp_.set(str_->data(), str_->length());
203        value_=value;
204        return NULL;
205    } else {
206        return pos+value;
207    }
208}
209
210U_NAMESPACE_END
211