1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 1996-2014, International Business Machines Corporation and 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* others. All Rights Reserved. 5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru******************************************************************************* 6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* 9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File coleitr.cpp 10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Created by: Helena Shih 12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History: 14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Date Name Description 16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 6/23/97 helena Adding comments to make code more readable. 18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 08/03/98 erm Synched with 1.2 version of CollationElementIterator.java 19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 12/10/99 aliu Ported Thai collation support from Java. 20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 01/25/01 swquek Modified to a C++ wrapper calling C APIs (ucoliter.h) 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 02/19/01 swquek Removed CollationElementIterator() since it is 22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* private constructor and no calls are made to it 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 2012-2014 markus Rewritten in C++ again. 24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h" 27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION 29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h" 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h" 32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h" 33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h" 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h" 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsets.h" 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h" 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h" 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uhash.h" 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h" 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h" 43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Constants --------------------------------------------------------------- */ 45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN 47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator) 49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator public constructor/destructor ------------------ */ 51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator( 53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const CollationElementIterator& other) 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) { 55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru *this = other; 56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::~CollationElementIterator() 59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete iter_; 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete offsets_; 62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator public methods --------------------------------- */ 65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint32_t getFirstHalf(uint32_t p, uint32_t lower32) { 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff); 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint32_t getSecondHalf(uint32_t p, uint32_t lower32) { 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f); 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool ceNeedsTwoParts(int64_t ce) { 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return (ce & INT64_C(0xffff00ff003f)) != 0; 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::getOffset() const 81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) { 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // CollationIterator::previousCE() decrements the CEs length 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // while it pops CEs from its internal buffer. 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t i = iter_->getCEsLength(); 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (otherHalf_ != 0) { 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Return the trailing CE offset while we are in the middle of a 64-bit CE. 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++i; 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(i < offsets_->size()); 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return offsets_->elementAti(i); 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return iter_->getOffset(); 94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Get the ordering priority of the next character in the string. 98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the next character's ordering. Returns NULLORDER if an error has 99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* occured or if the end of string has been reached 100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::next(UErrorCode& status) 102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { return NULLORDER; } 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (dir_ > 1) { 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Continue forward iteration. Test this first. 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (otherHalf_ != 0) { 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t oh = otherHalf_; 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = 0; 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return oh; 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (dir_ == 1) { 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // next() after setOffset() 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = 2; 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (dir_ == 0) { 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The iter_ is already reset to the start of the text. 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = 2; 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else /* dir_ < 0 */ { 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // illegal change of direction 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_INVALID_STATE_ERROR; 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULLORDER; 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // No need to keep all CEs in the buffer when we iterate. 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_->clearCEsIfNoneRemaining(); 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = iter_->nextCE(status); 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (ce == Collation::NO_CE) { return NULLORDER; } 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lower32 = (uint32_t)ce; 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t firstHalf = getFirstHalf(p, lower32); 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t secondHalf = getSecondHalf(p, lower32); 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (secondHalf != 0) { 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = secondHalf | 0xc0; // continuation CE 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return firstHalf; 135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool CollationElementIterator::operator!=( 138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const CollationElementIterator& other) const 139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return !(*this == other); 141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool CollationElementIterator::operator==( 144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const CollationElementIterator& that) const 145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (this == &that) { 147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return TRUE; 148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) && 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ == that.otherHalf_ && 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius normalizeDir() == that.normalizeDir() && 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius string_ == that.string_ && 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *iter_ == *that.iter_; 156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Get the ordering priority of the previous collation element in the string. 160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status the error code status. 161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the previous element's ordering. Returns NULLORDER if an error has 162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* occured or if the start of string has been reached. 163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::previous(UErrorCode& status) 165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { return NULLORDER; } 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (dir_ < 0) { 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Continue backwards iteration. Test this first. 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (otherHalf_ != 0) { 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t oh = otherHalf_; 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = 0; 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return oh; 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (dir_ == 0) { 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_->resetToOffset(string_.length()); 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = -1; 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if (dir_ == 1) { 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // previous() after setOffset() 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = -1; 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else /* dir_ > 1 */ { 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // illegal change of direction 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_INVALID_STATE_ERROR; 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULLORDER; 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (offsets_ == NULL) { 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius offsets_ = new UVector32(status); 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (offsets_ == NULL) { 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_MEMORY_ALLOCATION_ERROR; 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULLORDER; 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // If we already have expansion CEs, then we also have offsets. 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Otherwise remember the trailing offset in case we need to 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // write offsets for an artificial expansion. 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0; 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = iter_->previousCE(*offsets_, status); 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (ce == Collation::NO_CE) { return NULLORDER; } 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits. 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lower32 = (uint32_t)ce; 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t firstHalf = getFirstHalf(p, lower32); 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t secondHalf = getSecondHalf(p, lower32); 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (secondHalf != 0) { 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (offsets_->isEmpty()) { 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // When we convert a single 64-bit CE into two 32-bit CEs, 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // we need to make this artificial expansion behave like a normal expansion. 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // See CollationIterator::previousCE(). 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius offsets_->addElement(iter_->getOffset(), status); 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius offsets_->addElement(limitOffset, status); 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = firstHalf; 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return secondHalf | 0xc0; // continuation CE 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return firstHalf; 215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Resets the cursor to the beginning of the string. 219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::reset() 221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_ ->resetToOffset(0); 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = 0; 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = 0; 225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setOffset(int32_t newOffset, 228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { return; } 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (0 < newOffset && newOffset < string_.length()) { 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t offset = newOffset; 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar c = string_.charAt(offset); 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (!rbc_->isUnsafe(c) || 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) { 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius break; 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Back up to before this unsafe character. 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius --offset; 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while (offset > 0); 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (offset < newOffset) { 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We might have backed up more than necessary. 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe, 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // but for text "chu" setOffset(2) should remain at 2 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // although we initially back up to offset 0. 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Find the last safe offset no greater than newOffset by iterating forward. 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t lastSafeOffset = offset; 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_->resetToOffset(lastSafeOffset); 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius do { 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_->nextCE(status); 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(status)) { return; } 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while ((offset = iter_->getOffset()) == lastSafeOffset); 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (offset <= newOffset) { 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius lastSafeOffset = offset; 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } while (offset < newOffset); 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newOffset = lastSafeOffset; 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_->resetToOffset(newOffset); 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = 0; 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = 1; 265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Sets the source to the new source string. 269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setText(const UnicodeString& source, 271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) { 274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius string_ = source; 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *s = string_.getBuffer(); 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator *newIter; 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool numeric = rbc_->settings->isNumeric(); 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (rbc_->settings->dontCheckFCD()) { 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length()); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length()); 285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (newIter == NULL) { 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius status = U_MEMORY_ALLOCATION_ERROR; 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete iter_; 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_ = newIter; 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = 0; 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = 0; 294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Sets the source to the new character iterator. 297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setText(CharacterIterator& source, 298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru UErrorCode& status) 299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (U_FAILURE(status)) 301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return; 302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius source.getText(string_); 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setText(string_, status); 305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::strengthOrder(int32_t order) const 308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength(); 310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru // Mask off the unwanted differences. 311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru if (s == UCOL_PRIMARY) { 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order &= 0xffff0000; 313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru else if (s == UCOL_SECONDARY) { 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius order &= 0xffffff00; 316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru return order; 319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator private constructors/destructors --------------- */ 322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the "real" constructor for this class; it constructs an iterator 325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* over the source text using the specified collator 326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator( 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UnicodeString &source, 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const RuleBasedCollator *coll, 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &status) 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) { 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setText(source, status); 333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/** 336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the "real" constructor for this class; it constructs an iterator over 337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the source text using the specified collator 338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/ 339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator( 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CharacterIterator &source, 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const RuleBasedCollator *coll, 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &status) 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) { 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We only call source.getText() which should be const anyway. 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius setText(const_cast<CharacterIterator &>(source), status); 346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* CollationElementIterator private methods -------------------------------- */ 349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst CollationElementIterator& CollationElementIterator::operator=( 351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru const CollationElementIterator& other) 352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{ 353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (this == &other) { 354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return *this; 355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator *newIter; 358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const FCDUTF16CollationIterator *otherFCDIter = 359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_); 360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(otherFCDIter != NULL) { 361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer()); 362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UTF16CollationIterator *otherIter = 364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dynamic_cast<const UTF16CollationIterator *>(other.iter_); 365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(otherIter != NULL) { 366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer()); 367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius newIter = NULL; 369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(newIter != NULL) { 372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius delete iter_; 373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius iter_ = newIter; 374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rbc_ = other.rbc_; 375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius otherHalf_ = other.otherHalf_; 376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dir_ = other.dir_; 377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius string_ = other.string_; 379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) { 381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode errorCode = U_ZERO_ERROR; 382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(offsets_ == NULL) { 383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius offsets_ = new UVector32(other.offsets_->size(), errorCode); 384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(offsets_ != NULL) { 386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius offsets_->assign(*other.offsets_, errorCode); 387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return *this; 390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace { 393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass MaxExpSink : public ContractionsAndExpansions::CESink { 395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {} 397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~MaxExpSink(); 398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void handleCE(int64_t /*ce*/) {} 399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void handleExpansion(const int64_t ces[], int32_t length) { 400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (length <= 1) { 401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We do not need to add single CEs into the map. 402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return; 403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t count = 0; // number of CE "halves" 405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius for (int32_t i = 0; i < length; ++i) { 406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius count += ceNeedsTwoParts(ces[i]) ? 2 : 1; 407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // last "half" of the last CE 409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t ce = ces[length - 1]; 410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t p = (uint32_t)(ce >> 32); 411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lower32 = (uint32_t)ce; 412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t lastHalf = getSecondHalf(p, lower32); 413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (lastHalf == 0) { 414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius lastHalf = getFirstHalf(p, lower32); 415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(lastHalf != 0); 416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru } else { 417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius lastHalf |= 0xc0; // old-style continuation CE 418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) { 420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode); 421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru } 423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UHashtable *maxExpansions; 426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode; 427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusMaxExpSink::~MaxExpSink() {} 430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} // namespace 432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUHashtable * 434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) { 435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode)) { return NULL; } 436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong, 437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_compareLong, &errorCode); 438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode)) { return NULL; } 439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MaxExpSink sink(maxExpansions, errorCode); 440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode); 441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (U_FAILURE(errorCode)) { 442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uhash_close(maxExpansions); 443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return maxExpansions; 446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::getMaxExpansion(int32_t order) const { 450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return getMaxExpansion(rbc_->tailoring->maxExpansions, order); 451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) { 455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if (order == 0) { return 1; } 456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t max; 457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) { 458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return max; 459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if ((order & 0xc0) == 0xc0) { 461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // old-style continuation CE 462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 2; 463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 1; 465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru} 467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END 469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru 470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */ 471