1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2010-2014, International Business Machines 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* utf16collationiterator.h 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2010oct27 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __UTF16COLLATIONITERATOR_H__ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __UTF16COLLATIONITERATOR_H__ 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * UTF-16 collation element and character iterator. 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Handles normalized UTF-16 text inline, with length or NUL-terminated. 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Unnormalized text is handled by a subclass. 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API UTF16CollationIterator : public CollationIterator { 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator(const CollationData *d, UBool numeric, 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *s, const UChar *p, const UChar *lim) 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : CollationIterator(d, numeric), 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start(s), pos(p), limit(lim) {} 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText); 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~UTF16CollationIterator(); 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool operator==(const CollationIterator &other) const; 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void resetToOffset(int32_t newOffset); 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int32_t getOffset() const; 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void setText(const UChar *s, const UChar *lim) { 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius reset(); 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start = pos = s; 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius limit = lim; 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 nextCodePoint(UErrorCode &errorCode); 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 previousCodePoint(UErrorCode &errorCode); 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Copy constructor only for subclasses which set the pointers. 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UTF16CollationIterator(const UTF16CollationIterator &other) 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : CollationIterator(other), 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius start(NULL), pos(NULL), limit(NULL) {} 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar handleGetTrailSurrogate(); 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool foundNULTerminator(); 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // UTF-16 string pointers. 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // limit can be NULL for NUL-terminated strings. 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *start, *pos, *limit; 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Incrementally checks the input text for FCD and normalizes where necessary. 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator { 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUTF16CollationIterator(const CollationData *data, UBool numeric, 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *s, const UChar *p, const UChar *lim) 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : UTF16CollationIterator(data, numeric, s, p, lim), 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim), 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius nfcImpl(data->nfcImpl), 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius checkDir(1) {} 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText); 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~FCDUTF16CollationIterator(); 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool operator==(const CollationIterator &other) const; 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void resetToOffset(int32_t newOffset); 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int32_t getOffset() const; 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 nextCodePoint(UErrorCode &errorCode); 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 previousCodePoint(UErrorCode &errorCode); 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UBool foundNULTerminator(); 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Switches to forward checking if possible. 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void switchToForward(); 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Extend the FCD text segment forward or normalize around pos. 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * To be called when checkDir > 0 && pos != limit. 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if success, checkDir == 0 and pos != limit 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool nextSegment(UErrorCode &errorCode); 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Switches to backward checking. 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void switchToBackward(); 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Extend the FCD text segment backward or normalize around pos. 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * To be called when checkDir < 0 && pos != start. 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if success, checkDir == 0 and pos != start 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool previousSegment(UErrorCode &errorCode); 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode); 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Text pointers: The input text is [rawStart, rawLimit[ 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // where rawLimit can be NULL for NUL-terminated text. 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // checkDir > 0: 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The input text [segmentStart..pos[ passes the FCD check. 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Moving forward checks incrementally. 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // segmentLimit is undefined. limit == rawLimit. 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // checkDir < 0: 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The input text [pos..segmentLimit[ passes the FCD check. 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Moving backward checks incrementally. 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // segmentStart is undefined, start == rawStart. 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // checkDir == 0: 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // 162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The input text [segmentStart..segmentLimit[ is being processed. 163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // These pointers are at FCD boundaries. 164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Either this text segment already passes the FCD check 165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // and segmentStart==start<=pos<=limit==segmentLimit, 166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // or the current segment had to be normalized so that 167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // [segmentStart..segmentLimit[ turned into the normalized string, 168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length(). 169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *rawStart; 170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *segmentStart; 171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *segmentLimit; 172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // rawLimit==NULL for a NUL-terminated string. 173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const UChar *rawLimit; 174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const Normalizer2Impl &nfcImpl; 176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString normalized; 177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Direction of incremental FCD check. See comments before rawStart. 178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int8_t checkDir; 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __UTF16COLLATIONITERATOR_H__ 185