1/* 2******************************************************************************* 3* Copyright (C) 2010-2014, International Business Machines 4* Corporation and others. All Rights Reserved. 5******************************************************************************* 6* utf16collationiterator.h 7* 8* created on: 2010oct27 9* created by: Markus W. Scherer 10*/ 11 12#ifndef __UTF16COLLATIONITERATOR_H__ 13#define __UTF16COLLATIONITERATOR_H__ 14 15#include "unicode/utypes.h" 16 17#if !UCONFIG_NO_COLLATION 18 19#include "cmemory.h" 20#include "collation.h" 21#include "collationdata.h" 22#include "collationiterator.h" 23#include "normalizer2impl.h" 24 25U_NAMESPACE_BEGIN 26 27/** 28 * UTF-16 collation element and character iterator. 29 * Handles normalized UTF-16 text inline, with length or NUL-terminated. 30 * Unnormalized text is handled by a subclass. 31 */ 32class U_I18N_API UTF16CollationIterator : public CollationIterator { 33public: 34 UTF16CollationIterator(const CollationData *d, UBool numeric, 35 const UChar *s, const UChar *p, const UChar *lim) 36 : CollationIterator(d, numeric), 37 start(s), pos(p), limit(lim) {} 38 39 UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText); 40 41 virtual ~UTF16CollationIterator(); 42 43 virtual UBool operator==(const CollationIterator &other) const; 44 45 virtual void resetToOffset(int32_t newOffset); 46 47 virtual int32_t getOffset() const; 48 49 void setText(const UChar *s, const UChar *lim) { 50 reset(); 51 start = pos = s; 52 limit = lim; 53 } 54 55 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 56 57 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 58 59protected: 60 // Copy constructor only for subclasses which set the pointers. 61 UTF16CollationIterator(const UTF16CollationIterator &other) 62 : CollationIterator(other), 63 start(NULL), pos(NULL), limit(NULL) {} 64 65 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 66 67 virtual UChar handleGetTrailSurrogate(); 68 69 virtual UBool foundNULTerminator(); 70 71 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 72 73 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 74 75 // UTF-16 string pointers. 76 // limit can be NULL for NUL-terminated strings. 77 const UChar *start, *pos, *limit; 78}; 79 80/** 81 * Incrementally checks the input text for FCD and normalizes where necessary. 82 */ 83class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator { 84public: 85 FCDUTF16CollationIterator(const CollationData *data, UBool numeric, 86 const UChar *s, const UChar *p, const UChar *lim) 87 : UTF16CollationIterator(data, numeric, s, p, lim), 88 rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim), 89 nfcImpl(data->nfcImpl), 90 checkDir(1) {} 91 92 FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText); 93 94 virtual ~FCDUTF16CollationIterator(); 95 96 virtual UBool operator==(const CollationIterator &other) const; 97 98 virtual void resetToOffset(int32_t newOffset); 99 100 virtual int32_t getOffset() const; 101 102 virtual UChar32 nextCodePoint(UErrorCode &errorCode); 103 104 virtual UChar32 previousCodePoint(UErrorCode &errorCode); 105 106protected: 107 virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode); 108 109 virtual UBool foundNULTerminator(); 110 111 virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode); 112 113 virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode); 114 115private: 116 /** 117 * Switches to forward checking if possible. 118 * To be called when checkDir < 0 || (checkDir == 0 && pos == limit). 119 * Returns with checkDir > 0 || (checkDir == 0 && pos != limit). 120 */ 121 void switchToForward(); 122 123 /** 124 * Extend the FCD text segment forward or normalize around pos. 125 * To be called when checkDir > 0 && pos != limit. 126 * @return TRUE if success, checkDir == 0 and pos != limit 127 */ 128 UBool nextSegment(UErrorCode &errorCode); 129 130 /** 131 * Switches to backward checking. 132 * To be called when checkDir > 0 || (checkDir == 0 && pos == start). 133 * Returns with checkDir < 0 || (checkDir == 0 && pos != start). 134 */ 135 void switchToBackward(); 136 137 /** 138 * Extend the FCD text segment backward or normalize around pos. 139 * To be called when checkDir < 0 && pos != start. 140 * @return TRUE if success, checkDir == 0 and pos != start 141 */ 142 UBool previousSegment(UErrorCode &errorCode); 143 144 UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode); 145 146 // Text pointers: The input text is [rawStart, rawLimit[ 147 // where rawLimit can be NULL for NUL-terminated text. 148 // 149 // checkDir > 0: 150 // 151 // The input text [segmentStart..pos[ passes the FCD check. 152 // Moving forward checks incrementally. 153 // segmentLimit is undefined. limit == rawLimit. 154 // 155 // checkDir < 0: 156 // The input text [pos..segmentLimit[ passes the FCD check. 157 // Moving backward checks incrementally. 158 // segmentStart is undefined, start == rawStart. 159 // 160 // checkDir == 0: 161 // 162 // The input text [segmentStart..segmentLimit[ is being processed. 163 // These pointers are at FCD boundaries. 164 // Either this text segment already passes the FCD check 165 // and segmentStart==start<=pos<=limit==segmentLimit, 166 // or the current segment had to be normalized so that 167 // [segmentStart..segmentLimit[ turned into the normalized string, 168 // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length(). 169 const UChar *rawStart; 170 const UChar *segmentStart; 171 const UChar *segmentLimit; 172 // rawLimit==NULL for a NUL-terminated string. 173 const UChar *rawLimit; 174 175 const Normalizer2Impl &nfcImpl; 176 UnicodeString normalized; 177 // Direction of incremental FCD check. See comments before rawStart. 178 int8_t checkDir; 179}; 180 181U_NAMESPACE_END 182 183#endif // !UCONFIG_NO_COLLATION 184#endif // __UTF16COLLATIONITERATOR_H__ 185