164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 58de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert* Copyright (C) 2012-2016, International Business Machines 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* uitercollationiterator.h 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012sep23 (from utf16collationiterator.h) 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __UITERCOLLATIONITERATOR_H__ 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __UITERCOLLATIONITERATOR_H__ 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uiter.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 258de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "collationiterator.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * UCharIterator-based collation element and character iterator. 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Handles normalized text inline, with length or NUL-terminated. 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Unnormalized text is handled by a subclass. 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API UIterCollationIterator : public CollationIterator { 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui) 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : CollationIterator(d, numeric), iter(ui) {} 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~UIterCollationIterator(); 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void resetToOffset(int32_t newOffset); 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int32_t getOffset() const; 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 nextCodePoint(UErrorCode &errorCode); 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 previousCodePoint(UErrorCode &errorCode); 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar handleGetTrailSurrogate(); 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UCharIterator &iter; 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/** 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Incrementally checks the input text for FCD and normalizes where necessary. 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator { 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic: 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex) 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius : UIterCollationIterator(data, numeric, ui), 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius state(ITER_CHECK_FWD), start(startIndex), 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius nfcImpl(data->nfcImpl) {} 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual ~FCDUIterCollationIterator(); 73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void resetToOffset(int32_t newOffset); 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual int32_t getOffset() const; 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 nextCodePoint(UErrorCode &errorCode); 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar32 previousCodePoint(UErrorCode &errorCode); 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected: 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual UChar handleGetTrailSurrogate(); 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate: 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Switches to forward checking if possible. 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void switchToForward(); 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Extends the FCD text segment forward or normalizes around pos. 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if success 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool nextSegment(UErrorCode &errorCode); 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Switches to backward checking. 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius void switchToBackward(); 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Extends the FCD text segment backward or normalizes around pos. 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * @return TRUE if success 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool previousSegment(UErrorCode &errorCode); 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool normalize(const UnicodeString &s, UErrorCode &errorCode); 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius enum State { 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The input text [start..(iter index)[ passes the FCD check. 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Moving forward checks incrementally. 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * pos & limit are undefined. 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ITER_CHECK_FWD, 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The input text [(iter index)..limit[ passes the FCD check. 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Moving backward checks incrementally. 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * start & pos are undefined. 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ITER_CHECK_BWD, 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The input text [start..limit[ passes the FCD check. 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * pos tracks the current text index. 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius ITER_IN_FCD_SEGMENT, 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The input text [start..limit[ failed the FCD check and was normalized. 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * pos tracks the current index in the normalized string. 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The text iterator is at the limit index. 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IN_NORM_ITER_AT_LIMIT, 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius /** 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The input text [start..limit[ failed the FCD check and was normalized. 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * pos tracks the current index in the normalized string. 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * The text iterator is at the start index. 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */ 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius IN_NORM_ITER_AT_START 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius }; 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius State state; 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t start; 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t pos; 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t limit; 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const Normalizer2Impl &nfcImpl; 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeString normalized; 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // __UITERCOLLATIONITERATOR_H__ 162