1/*
2*******************************************************************************
3* Copyright (C) 2010-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* utf16collationiterator.h
7*
8* created on: 2010oct27
9* created by: Markus W. Scherer
10*/
11
12#ifndef __UTF16COLLATIONITERATOR_H__
13#define __UTF16COLLATIONITERATOR_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "cmemory.h"
20#include "collation.h"
21#include "collationdata.h"
22#include "collationiterator.h"
23#include "normalizer2impl.h"
24
25U_NAMESPACE_BEGIN
26
27/**
28 * UTF-16 collation element and character iterator.
29 * Handles normalized UTF-16 text inline, with length or NUL-terminated.
30 * Unnormalized text is handled by a subclass.
31 */
32class U_I18N_API UTF16CollationIterator : public CollationIterator {
33public:
34    UTF16CollationIterator(const CollationData *d, UBool numeric,
35                           const UChar *s, const UChar *p, const UChar *lim)
36            : CollationIterator(d, numeric),
37              start(s), pos(p), limit(lim) {}
38
39    UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
40
41    virtual ~UTF16CollationIterator();
42
43    virtual UBool operator==(const CollationIterator &other) const;
44
45    virtual void resetToOffset(int32_t newOffset);
46
47    virtual int32_t getOffset() const;
48
49    void setText(const UChar *s, const UChar *lim) {
50        reset();
51        start = pos = s;
52        limit = lim;
53    }
54
55    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
56
57    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
58
59protected:
60    // Copy constructor only for subclasses which set the pointers.
61    UTF16CollationIterator(const UTF16CollationIterator &other)
62            : CollationIterator(other),
63              start(NULL), pos(NULL), limit(NULL) {}
64
65    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
66
67    virtual UChar handleGetTrailSurrogate();
68
69    virtual UBool foundNULTerminator();
70
71    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
72
73    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
74
75    // UTF-16 string pointers.
76    // limit can be NULL for NUL-terminated strings.
77    const UChar *start, *pos, *limit;
78};
79
80/**
81 * Incrementally checks the input text for FCD and normalizes where necessary.
82 */
83class U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
84public:
85    FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
86                              const UChar *s, const UChar *p, const UChar *lim)
87            : UTF16CollationIterator(data, numeric, s, p, lim),
88              rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
89              nfcImpl(data->nfcImpl),
90              checkDir(1) {}
91
92    FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
93
94    virtual ~FCDUTF16CollationIterator();
95
96    virtual UBool operator==(const CollationIterator &other) const;
97
98    virtual void resetToOffset(int32_t newOffset);
99
100    virtual int32_t getOffset() const;
101
102    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
103
104    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
105
106protected:
107    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
108
109    virtual UBool foundNULTerminator();
110
111    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
112
113    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
114
115private:
116    /**
117     * Switches to forward checking if possible.
118     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
119     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
120     */
121    void switchToForward();
122
123    /**
124     * Extend the FCD text segment forward or normalize around pos.
125     * To be called when checkDir > 0 && pos != limit.
126     * @return TRUE if success, checkDir == 0 and pos != limit
127     */
128    UBool nextSegment(UErrorCode &errorCode);
129
130    /**
131     * Switches to backward checking.
132     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
133     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
134     */
135    void switchToBackward();
136
137    /**
138     * Extend the FCD text segment backward or normalize around pos.
139     * To be called when checkDir < 0 && pos != start.
140     * @return TRUE if success, checkDir == 0 and pos != start
141     */
142    UBool previousSegment(UErrorCode &errorCode);
143
144    UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
145
146    // Text pointers: The input text is [rawStart, rawLimit[
147    // where rawLimit can be NULL for NUL-terminated text.
148    //
149    // checkDir > 0:
150    //
151    // The input text [segmentStart..pos[ passes the FCD check.
152    // Moving forward checks incrementally.
153    // segmentLimit is undefined. limit == rawLimit.
154    //
155    // checkDir < 0:
156    // The input text [pos..segmentLimit[ passes the FCD check.
157    // Moving backward checks incrementally.
158    // segmentStart is undefined, start == rawStart.
159    //
160    // checkDir == 0:
161    //
162    // The input text [segmentStart..segmentLimit[ is being processed.
163    // These pointers are at FCD boundaries.
164    // Either this text segment already passes the FCD check
165    // and segmentStart==start<=pos<=limit==segmentLimit,
166    // or the current segment had to be normalized so that
167    // [segmentStart..segmentLimit[ turned into the normalized string,
168    // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
169    const UChar *rawStart;
170    const UChar *segmentStart;
171    const UChar *segmentLimit;
172    // rawLimit==NULL for a NUL-terminated string.
173    const UChar *rawLimit;
174
175    const Normalizer2Impl &nfcImpl;
176    UnicodeString normalized;
177    // Direction of incremental FCD check. See comments before rawStart.
178    int8_t checkDir;
179};
180
181U_NAMESPACE_END
182
183#endif  // !UCONFIG_NO_COLLATION
184#endif  // __UTF16COLLATIONITERATOR_H__
185