164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho/* 4b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 5b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Copyright (C) 2010-2011, International Business Machines 6b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* Corporation and others. All Rights Reserved. 7b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho******************************************************************************* 8b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* file name: ucharstrie.h 9b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* encoding: US-ASCII 10b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* tab size: 8 (not used) 11b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* indentation:4 12b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* 13b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created on: 2010nov14 14b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho* created by: Markus W. Scherer 15b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho*/ 16b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 17b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/utypes.h" 18b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/appendable.h" 19b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/ucharstrie.h" 20b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "unicode/uobject.h" 2183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius#include "unicode/utf16.h" 22b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "cmemory.h" 23b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho#include "uassert.h" 24b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 25b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_BEGIN 26b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 27b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::~UCharsTrie() { 28b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uprv_free(ownedArray_); 29b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 30b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 31b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult 32b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::current() const { 33b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *pos=pos_; 34b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(pos==NULL) { 35b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 36b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 37b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node; 38b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (remainingMatchLength_<0 && (node=*pos)>=kMinValueLead) ? 39b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho valueResult(node) : USTRINGTRIE_NO_VALUE; 40b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 41b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 42b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 43b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult 4483a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::firstForCodePoint(UChar32 cp) { 4583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return cp<=0xffff ? 4683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius first(cp) : 4783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (USTRINGTRIE_HAS_NEXT(first(U16_LEAD(cp))) ? 4883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius next(U16_TRAIL(cp)) : 4983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius USTRINGTRIE_NO_MATCH); 5083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 5183a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 5283a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult 5383a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUCharsTrie::nextForCodePoint(UChar32 cp) { 5483a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius return cp<=0xffff ? 5583a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius next(cp) : 5683a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius (USTRINGTRIE_HAS_NEXT(next(U16_LEAD(cp))) ? 5783a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius next(U16_TRAIL(cp)) : 5883a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius USTRINGTRIE_NO_MATCH); 5983a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius} 6083a171d1a62abf406f7f44ae671823d5ec20db7dCraig Cornelius 6183a171d1a62abf406f7f44ae671823d5ec20db7dCraig CorneliusUStringTrieResult 62b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::branchNext(const UChar *pos, int32_t length, int32_t uchar) { 63b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Branch according to the current unit. 64b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length==0) { 65b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=*pos++; 66b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 67b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++length; 68b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The length of the branch is the number of units to select from. 69b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // The data structure encodes a binary search. 70b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(length>kMaxBranchLinearSubNodeLength) { 71b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar<*pos++) { 72b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length>>=1; 73b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=jumpByDelta(pos); 74b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 75b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=length-(length>>1); 76b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipDelta(pos); 77b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 78b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 79b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Drop down to linear search for the last few units. 80b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // length>=2 because the loop body above sees length>kMaxBranchLinearSubNodeLength>=3 81b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // and divides length by 2. 82b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 83b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar==*pos++) { 84b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UStringTrieResult result; 85b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos; 86b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node&kValueIsFinal) { 87b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Leave the final value for getValue() to read. 88b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result=USTRINGTRIE_FINAL_VALUE; 89b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 90b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Use the non-final value as the jump delta. 91b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; 92b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // int32_t delta=readValue(pos, node); 93b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t delta; 94b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node<kMinTwoUnitValueLead) { 95b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delta=node; 96b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node<kThreeUnitValueLead) { 97b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delta=((node-kMinTwoUnitValueLead)<<16)|*pos++; 98b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 99b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho delta=(pos[0]<<16)|pos[1]; 100b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos+=2; 101b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 102b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // end readValue() 103b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos+=delta; 104b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos; 105b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho result= node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 106b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 107b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 108b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 109b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 110b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --length; 111b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipValue(pos); 112b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(length>1); 113b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar==*pos++) { 114b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 115b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos; 116b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return node>=kMinValueLead ? valueResult(node) : USTRINGTRIE_NO_VALUE; 117b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 118b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 119b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 120b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 121b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 122b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 123b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult 124b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::nextImpl(const UChar *pos, int32_t uchar) { 125b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos++; 126b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 127b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node<kMinLinearMatch) { 128b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return branchNext(pos, node, uchar); 129b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node<kMinValueLead) { 130b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Match the first of length+1 units. 131b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=node-kMinLinearMatch; // Actual match length minus 1. 132b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar==*pos++) { 133b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=--length; 134b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 135b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (length<0 && (node=*pos)>=kMinValueLead) ? 136b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho valueResult(node) : USTRINGTRIE_NO_VALUE; 137b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 138b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No match. 139b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 140b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 141b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node&kValueIsFinal) { 142b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No further matching units. 143b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 144b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 145b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Skip intermediate value. 146b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipNodeValue(pos, node); 147b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node&=kNodeTypeMask; 148b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 149b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 150b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 151b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 152b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 153b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 154b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult 155b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(int32_t uchar) { 156b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *pos=pos_; 157b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(pos==NULL) { 158b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 159b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 160b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 161b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length>=0) { 162b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Remaining part of a linear-match node. 163b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar==*pos++) { 164b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=--length; 165b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 166b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node; 167b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (length<0 && (node=*pos)>=kMinValueLead) ? 168b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho valueResult(node) : USTRINGTRIE_NO_VALUE; 169b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 170b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 171b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 172b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 173b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 174b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return nextImpl(pos, uchar); 175b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 176b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 177b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUStringTrieResult 178b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::next(const UChar *s, int32_t sLength) { 179b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(sLength<0 ? *s==0 : sLength==0) { 180b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Empty input. 181b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return current(); 182b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 183b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *pos=pos_; 184b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(pos==NULL) { 185b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 186b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 187b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t length=remainingMatchLength_; // Actual remaining match length minus 1. 188b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 189b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Fetch the next input unit, if there is one. 190b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Continue a linear-match node without rechecking sLength<0. 191b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t uchar; 192b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(sLength<0) { 193b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 194b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if((uchar=*s++)==0) { 195b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=length; 196b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 197b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node; 198b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (length<0 && (node=*pos)>=kMinValueLead) ? 199b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho valueResult(node) : USTRINGTRIE_NO_VALUE; 200b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 201b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length<0) { 202b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=length; 203b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 204b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 205b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar!=*pos) { 206b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 207b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 208b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 209b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; 210b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --length; 211b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 212b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 213b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 214b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(sLength==0) { 215b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=length; 216b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos_=pos; 217b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node; 218b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return (length<0 && (node=*pos)>=kMinValueLead) ? 219b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho valueResult(node) : USTRINGTRIE_NO_VALUE; 220b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 221b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uchar=*s++; 222b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --sLength; 223b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(length<0) { 224b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho remainingMatchLength_=length; 225b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 226b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 227b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar!=*pos) { 228b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 229b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 230b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 231b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; 232b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --length; 233b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 234b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 235b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos++; 236b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 237b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node<kMinLinearMatch) { 238b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UStringTrieResult result=branchNext(pos, node, uchar); 239b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(result==USTRINGTRIE_NO_MATCH) { 240b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 241b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 242b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Fetch the next input unit, if there is one. 243b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(sLength<0) { 244b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if((uchar=*s++)==0) { 245b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 246b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 247b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 248b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(sLength==0) { 249b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return result; 250b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 251b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uchar=*s++; 252b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --sLength; 253b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 254b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(result==USTRINGTRIE_FINAL_VALUE) { 255b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No further matching units. 256b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 257b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 258b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 259b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=pos_; // branchNext() advanced pos and wrote it to pos_ . 260b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos++; 261b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node<kMinValueLead) { 262b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Match length+1 units. 263b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=node-kMinLinearMatch; // Actual match length minus 1. 264b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(uchar!=*pos) { 265b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 266b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 267b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 268b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; 269b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho --length; 270b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho break; 271b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node&kValueIsFinal) { 272b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // No further matching units. 273b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho stop(); 274b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return USTRINGTRIE_NO_MATCH; 275b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 276b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // Skip intermediate value. 277b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipNodeValue(pos, node); 278b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node&=kNodeTypeMask; 279b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 280b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 281b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 282b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 283b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 284b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoconst UChar * 285b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValueFromBranch(const UChar *pos, int32_t length, 286b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool haveUniqueValue, int32_t &uniqueValue) { 287b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(length>kMaxBranchLinearSubNodeLength) { 288b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; // ignore the comparison unit 289b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(NULL==findUniqueValueFromBranch(jumpByDelta(pos), length>>1, haveUniqueValue, uniqueValue)) { 290b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 291b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 292b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=length-(length>>1); 293b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipDelta(pos); 294b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 295b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 296b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; // ignore a comparison unit 297b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // handle its value 298b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos++; 299b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool isFinal=(UBool)(node>>15); 300b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node&=0x7fff; 301b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value=readValue(pos, node); 302b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipValue(pos, node); 303b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(isFinal) { 304b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(haveUniqueValue) { 305b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(value!=uniqueValue) { 306b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 307b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 308b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 309b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uniqueValue=value; 310b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho haveUniqueValue=TRUE; 311b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 312b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 313b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(!findUniqueValue(pos+value, haveUniqueValue, uniqueValue)) { 314b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return NULL; 315b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 316b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho haveUniqueValue=TRUE; 317b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 318b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(--length>1); 319b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return pos+1; // ignore the last comparison unit 320b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 321b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 322b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUBool 323b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::findUniqueValue(const UChar *pos, UBool haveUniqueValue, int32_t &uniqueValue) { 324b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos++; 325b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho for(;;) { 326b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node<kMinLinearMatch) { 327b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node==0) { 328b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos++; 329b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 330b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=findUniqueValueFromBranch(pos, node+1, haveUniqueValue, uniqueValue); 331b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(pos==NULL) { 332b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 333b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 334b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho haveUniqueValue=TRUE; 335b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos++; 336b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else if(node<kMinValueLead) { 337b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // linear-match node 338b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos+=node-kMinLinearMatch+1; // Ignore the match units. 339b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos++; 340b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 341b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho UBool isFinal=(UBool)(node>>15); 342b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t value; 343b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(isFinal) { 344b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=readValue(pos, node&0x7fff); 345b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 346b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho value=readNodeValue(pos, node); 347b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 348b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(haveUniqueValue) { 349b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(value!=uniqueValue) { 350b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return FALSE; 351b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 352b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 353b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho uniqueValue=value; 354b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho haveUniqueValue=TRUE; 355b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 356b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(isFinal) { 357b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return TRUE; 358b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 359b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipNodeValue(pos, node); 360b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node&=kNodeTypeMask; 361b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 362b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 363b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 364b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 365b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoint32_t 366b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextUChars(Appendable &out) const { 367b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho const UChar *pos=pos_; 368b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(pos==NULL) { 369b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 370b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 371b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(remainingMatchLength_>=0) { 372b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho out.appendCodeUnit(*pos); // Next unit of a pending linear-match node. 373b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 374b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 375b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho int32_t node=*pos++; 376b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node>=kMinValueLead) { 377b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node&kValueIsFinal) { 378b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 0; 379b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 380b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipNodeValue(pos, node); 381b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node&=kNodeTypeMask; 382b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 383b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 384b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node<kMinLinearMatch) { 385b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho if(node==0) { 386b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho node=*pos++; 387b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 388b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho out.reserveAppendCapacity(++node); 389b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho getNextBranchUChars(pos, node, out); 390b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return node; 391b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } else { 392b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho // First unit of the linear-match node. 393b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho out.appendCodeUnit(*pos); 394b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho return 1; 395b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 396b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 397b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 398b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehovoid 399b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoUCharsTrie::getNextBranchUChars(const UChar *pos, int32_t length, Appendable &out) { 400b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho while(length>kMaxBranchLinearSubNodeLength) { 401b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho ++pos; // ignore the comparison unit 402b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho getNextBranchUChars(jumpByDelta(pos), length>>1, out); 403b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho length=length-(length>>1); 404b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipDelta(pos); 405b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } 406b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho do { 407b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho out.appendCodeUnit(*pos++); 408b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho pos=skipValue(pos); 409b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho } while(--length>1); 410b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho out.appendCodeUnit(*pos); 411b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho} 412b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2claireho 413b26ce3a7367e4ed2ee7ddddcdc3f3d3377a455c2clairehoU_NAMESPACE_END 414