1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2010-2014, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationiterator.h 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2010oct27 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONITERATOR_H__ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONITERATOR_H__ 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass SkippedState; 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCharsTrie; 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UVector32; 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation element iterator and abstract character iterator. 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * When a method returns a code point value, it must be in 0..10FFFF, 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * except it can be negative as a sentinel value. 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationIterator : public UObject { 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius class CEBuffer { 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius private: 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** Large enough for CEs of most short strings. */ 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius static const int32_t INITIAL_CAPACITY = 40; 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius public: 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CEBuffer() : length(0) {} 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ~CEBuffer(); 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline void append(int64_t ce, UErrorCode &errorCode) { 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) { 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buffer[length++] = ce; 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline void appendUnsafe(int64_t ce) { 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius buffer[length++] = ce; 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool ensureAppendCapacity(int32_t appCap, UErrorCode &errorCode); 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool incLength(UErrorCode &errorCode) { 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Use INITIAL_CAPACITY for a very simple fastpath. 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // (Rather than buffer.getCapacity().) 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) { 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ++length; 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return TRUE; 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return FALSE; 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int64_t set(int32_t i, int64_t ce) { 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return buffer[i] = ce; 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int64_t get(int32_t i) const { return buffer[i]; } 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t *getCEs() const { return buffer.getAlias(); } 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius private: 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CEBuffer(const CEBuffer &); 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void operator=(const CEBuffer &); 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius MaybeStackArray<int64_t, INITIAL_CAPACITY> buffer; 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator(const CollationData *d, UBool numeric) 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : trie(d->trie), 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data(d), 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cesIndex(0), 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius skipped(NULL), 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius numCpFwd(-1), 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius isNumeric(numeric) {} 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~CollationIterator(); 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool operator==(const CollationIterator &other) const; 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline UBool operator!=(const CollationIterator &other) const { 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return !operator==(other); 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Resets the iterator state and sets the position to the specified offset. 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Subclasses must implement, and must call the parent class method, 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * or CollationIterator::reset(). 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void resetToOffset(int32_t newOffset) = 0; 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int32_t getOffset() const = 0; 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the next collation element. 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int64_t nextCE(UErrorCode &errorCode) { 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cesIndex < ceBuffer.length) { 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Return the next buffered CE. 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.get(cesIndex++); 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // assert cesIndex == ceBuffer.length; 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!ceBuffer.incLength(errorCode)) { 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return Collation::NO_CE; 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 c; 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t ce32 = handleNextCE32(c, errorCode); 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t t = ce32 & 0xff; 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t < Collation::SPECIAL_CE32_LOW_BYTE) { // Forced-inline of isSpecialCE32(ce32). 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Normal CE from the main data. 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Forced-inline of ceFromSimpleCE32(ce32). 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.set(cesIndex++, 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8)); 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *d; 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The compiler should be able to optimize the previous and the following 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // comparisons of t with the same constant. 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t == Collation::SPECIAL_CE32_LOW_BYTE) { 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(c < 0) { 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.set(cesIndex++, Collation::NO_CE); 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius d = data->base; 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ce32 = d->getCE32(c); 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius t = ce32 & 0xff; 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t < Collation::SPECIAL_CE32_LOW_BYTE) { 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Normal CE from the base data. 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.set(cesIndex++, 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8)); 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius d = data; 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(t == Collation::LONG_PRIMARY_CE32_LOW_BYTE) { 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Forced-inline of ceFromLongPrimaryCE32(ce32). 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.set(cesIndex++, 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ((int64_t)(ce32 - t) << 32) | Collation::COMMON_SEC_AND_TER_CE); 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return nextCEFromCE32(d, c, ce32, errorCode); 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Fetches all CEs. 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return getCEsLength() 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t fetchCEs(UErrorCode &errorCode); 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Overwrites the current CE (the last one returned by nextCE()). 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setCurrentCE(int64_t ce) { 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // assert cesIndex > 0; 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ceBuffer.set(cesIndex - 1, ce); 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the previous collation element. 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t previousCE(UVector32 &offsets, UErrorCode &errorCode); 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int32_t getCEsLength() const { 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.length; 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius inline int64_t getCE(int32_t i) const { 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.get(i); 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const int64_t *getCEs() const { 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return ceBuffer.getCEs(); 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void clearCEs() { 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius cesIndex = ceBuffer.length = 0; 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void clearCEsIfNoneRemaining() { 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(cesIndex == ceBuffer.length) { clearCEs(); } 193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the next code point (with post-increment). 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Public for identical-level comparison and for testing. 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 nextCodePoint(UErrorCode &errorCode) = 0; 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the previous code point (with pre-decrement). 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Public for identical-level comparison and for testing. 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 previousCodePoint(UErrorCode &errorCode) = 0; 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CollationIterator(const CollationIterator &other); 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void reset(); 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the next code point and its local CE32 value. 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns Collation::FALLBACK_CE32 at the end of the text (c<0) 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * or when c's CE32 value is to be looked up in the base data (fallback). 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The code point is used for fallbacks, context and implicit weights. 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * It is ignored when the returned CE32 is not special (e.g., FFFD_CE32). 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Called when handleNextCE32() returns a LEAD_SURROGATE_TAG for a lead surrogate code unit. 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the trail surrogate in that case and advances past it, 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * if a trail surrogate follows the lead surrogate. 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Otherwise returns any other code unit and does not advance. 227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar handleGetTrailSurrogate(); 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Called when handleNextCE32() returns with c==0, to see whether it is a NUL terminator. 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * (Not needed in Java.) 233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool foundNULTerminator(); 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return FALSE if surrogate code points U+D800..U+DFFF 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * map to their own implicit primary weights (for UTF-16), 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * or TRUE if they map to CE(U+FFFD) (for UTF-8) 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool forbidSurrogateCodePoints() const; 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0; 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0; 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the CE32 from the data trie. 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Normally the same as data->getCE32(), but overridden in the builder. 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Call this only when the faster data->getCE32() cannot be used. 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t getDataCE32(UChar32 c) const; 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode); 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void appendCEsFromCE32(const CollationData *d, UChar32 c, uint32_t ce32, 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool forward, UErrorCode &errorCode); 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Main lookup trie of the data object. 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UTrie2 *trie; 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *data; 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t nextCEFromCE32(const CollationData *d, UChar32 c, uint32_t ce32, 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t getCE32FromPrefix(const CollationData *d, uint32_t ce32, 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UChar32 nextSkippedCodePoint(UErrorCode &errorCode); 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void backwardNumSkipped(int32_t n, UErrorCode &errorCode); 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t nextCE32FromContraction( 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *d, uint32_t contractionCE32, 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *p, uint32_t ce32, UChar32 c, 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint32_t nextCE32FromDiscontiguousContraction( 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *d, UCharsTrie &suffixes, uint32_t ce32, 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t lookAhead, UChar32 c, 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode); 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns the previous CE when data->isUnsafeBackward(c, isNumeric). 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int64_t previousCEUnsafe(UChar32 c, UVector32 &offsets, UErrorCode &errorCode); 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Turns a string of digits (bytes 0..9) 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * into a sequence of CEs that will sort in numeric order. 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Starts from this ce32's digit value and consumes the following/preceding digits. 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The digits string must not be empty and must not have leading zeros. 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void appendNumericCEs(uint32_t ce32, UBool forward, UErrorCode &errorCode); 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Turns 1..254 digits into a sequence of CEs. 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Called by appendNumericCEs() for each segment of at most 254 digits. 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void appendNumericSegmentCEs(const char *digits, int32_t length, UErrorCode &errorCode); 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius CEBuffer ceBuffer; 305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t cesIndex; 306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius SkippedState *skipped; 308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Number of code points to read forward, or -1. 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Used as a forward iteration limit in previousCEUnsafe(). 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t numCpFwd; 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Numeric collation (CollationSettings::NUMERIC). 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool isNumeric; 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __COLLATIONITERATOR_H__ 320