1// Copyright (C) 2016 and later: Unicode, Inc. and others. 2// License & terms of use: http://www.unicode.org/copyright.html 3/* 4******************************************************************************* 5* Copyright (C) 2010-2012, International Business Machines 6* Corporation and others. All Rights Reserved. 7******************************************************************************* 8* file name: bytestrieiterator.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2010nov03 14* created by: Markus W. Scherer 15*/ 16 17#include "unicode/utypes.h" 18#include "unicode/bytestrie.h" 19#include "unicode/stringpiece.h" 20#include "charstr.h" 21#include "uvectr32.h" 22 23U_NAMESPACE_BEGIN 24 25BytesTrie::Iterator::Iterator(const void *trieBytes, int32_t maxStringLength, 26 UErrorCode &errorCode) 27 : bytes_(static_cast<const uint8_t *>(trieBytes)), 28 pos_(bytes_), initialPos_(bytes_), 29 remainingMatchLength_(-1), initialRemainingMatchLength_(-1), 30 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 31 if(U_FAILURE(errorCode)) { 32 return; 33 } 34 // str_ and stack_ are pointers so that it's easy to turn bytestrie.h into 35 // a public API header for which we would want it to depend only on 36 // other public headers. 37 // Unlike BytesTrie itself, its Iterator performs memory allocations anyway 38 // via the CharString and UVector32 implementations, so this additional 39 // cost is minimal. 40 str_=new CharString(); 41 stack_=new UVector32(errorCode); 42 if(U_SUCCESS(errorCode) && (str_==NULL || stack_==NULL)) { 43 errorCode=U_MEMORY_ALLOCATION_ERROR; 44 } 45} 46 47BytesTrie::Iterator::Iterator(const BytesTrie &trie, int32_t maxStringLength, 48 UErrorCode &errorCode) 49 : bytes_(trie.bytes_), pos_(trie.pos_), initialPos_(trie.pos_), 50 remainingMatchLength_(trie.remainingMatchLength_), 51 initialRemainingMatchLength_(trie.remainingMatchLength_), 52 str_(NULL), maxLength_(maxStringLength), value_(0), stack_(NULL) { 53 if(U_FAILURE(errorCode)) { 54 return; 55 } 56 str_=new CharString(); 57 stack_=new UVector32(errorCode); 58 if(U_FAILURE(errorCode)) { 59 return; 60 } 61 if(str_==NULL || stack_==NULL) { 62 errorCode=U_MEMORY_ALLOCATION_ERROR; 63 return; 64 } 65 int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 66 if(length>=0) { 67 // Pending linear-match node, append remaining bytes to str_. 68 ++length; 69 if(maxLength_>0 && length>maxLength_) { 70 length=maxLength_; // This will leave remainingMatchLength>=0 as a signal. 71 } 72 str_->append(reinterpret_cast<const char *>(pos_), length, errorCode); 73 pos_+=length; 74 remainingMatchLength_-=length; 75 } 76} 77 78BytesTrie::Iterator::~Iterator() { 79 delete str_; 80 delete stack_; 81} 82 83BytesTrie::Iterator & 84BytesTrie::Iterator::reset() { 85 pos_=initialPos_; 86 remainingMatchLength_=initialRemainingMatchLength_; 87 int32_t length=remainingMatchLength_+1; // Remaining match length. 88 if(maxLength_>0 && length>maxLength_) { 89 length=maxLength_; 90 } 91 str_->truncate(length); 92 pos_+=length; 93 remainingMatchLength_-=length; 94 stack_->setSize(0); 95 return *this; 96} 97 98UBool 99BytesTrie::Iterator::hasNext() const { return pos_!=NULL || !stack_->isEmpty(); } 100 101UBool 102BytesTrie::Iterator::next(UErrorCode &errorCode) { 103 if(U_FAILURE(errorCode)) { 104 return FALSE; 105 } 106 const uint8_t *pos=pos_; 107 if(pos==NULL) { 108 if(stack_->isEmpty()) { 109 return FALSE; 110 } 111 // Pop the state off the stack and continue with the next outbound edge of 112 // the branch node. 113 int32_t stackSize=stack_->size(); 114 int32_t length=stack_->elementAti(stackSize-1); 115 pos=bytes_+stack_->elementAti(stackSize-2); 116 stack_->setSize(stackSize-2); 117 str_->truncate(length&0xffff); 118 length=(int32_t)((uint32_t)length>>16); 119 if(length>1) { 120 pos=branchNext(pos, length, errorCode); 121 if(pos==NULL) { 122 return TRUE; // Reached a final value. 123 } 124 } else { 125 str_->append((char)*pos++, errorCode); 126 } 127 } 128 if(remainingMatchLength_>=0) { 129 // We only get here if we started in a pending linear-match node 130 // with more than maxLength remaining bytes. 131 return truncateAndStop(); 132 } 133 for(;;) { 134 int32_t node=*pos++; 135 if(node>=kMinValueLead) { 136 // Deliver value for the byte sequence so far. 137 UBool isFinal=(UBool)(node&kValueIsFinal); 138 value_=readValue(pos, node>>1); 139 if(isFinal || (maxLength_>0 && str_->length()==maxLength_)) { 140 pos_=NULL; 141 } else { 142 pos_=skipValue(pos, node); 143 } 144 return TRUE; 145 } 146 if(maxLength_>0 && str_->length()==maxLength_) { 147 return truncateAndStop(); 148 } 149 if(node<kMinLinearMatch) { 150 if(node==0) { 151 node=*pos++; 152 } 153 pos=branchNext(pos, node+1, errorCode); 154 if(pos==NULL) { 155 return TRUE; // Reached a final value. 156 } 157 } else { 158 // Linear-match node, append length bytes to str_. 159 int32_t length=node-kMinLinearMatch+1; 160 if(maxLength_>0 && str_->length()+length>maxLength_) { 161 str_->append(reinterpret_cast<const char *>(pos), 162 maxLength_-str_->length(), errorCode); 163 return truncateAndStop(); 164 } 165 str_->append(reinterpret_cast<const char *>(pos), length, errorCode); 166 pos+=length; 167 } 168 } 169} 170 171StringPiece 172BytesTrie::Iterator::getString() const { 173 return str_ == NULL ? StringPiece() : str_->toStringPiece(); 174} 175 176UBool 177BytesTrie::Iterator::truncateAndStop() { 178 pos_=NULL; 179 value_=-1; // no real value for str 180 return TRUE; 181} 182 183// Branch node, needs to take the first outbound edge and push state for the rest. 184const uint8_t * 185BytesTrie::Iterator::branchNext(const uint8_t *pos, int32_t length, UErrorCode &errorCode) { 186 while(length>kMaxBranchLinearSubNodeLength) { 187 ++pos; // ignore the comparison byte 188 // Push state for the greater-or-equal edge. 189 stack_->addElement((int32_t)(skipDelta(pos)-bytes_), errorCode); 190 stack_->addElement(((length-(length>>1))<<16)|str_->length(), errorCode); 191 // Follow the less-than edge. 192 length>>=1; 193 pos=jumpByDelta(pos); 194 } 195 // List of key-value pairs where values are either final values or jump deltas. 196 // Read the first (key, value) pair. 197 uint8_t trieByte=*pos++; 198 int32_t node=*pos++; 199 UBool isFinal=(UBool)(node&kValueIsFinal); 200 int32_t value=readValue(pos, node>>1); 201 pos=skipValue(pos, node); 202 stack_->addElement((int32_t)(pos-bytes_), errorCode); 203 stack_->addElement(((length-1)<<16)|str_->length(), errorCode); 204 str_->append((char)trieByte, errorCode); 205 if(isFinal) { 206 pos_=NULL; 207 value_=value; 208 return NULL; 209 } else { 210 return pos+value; 211 } 212} 213 214U_NAMESPACE_END 215