1/*
2*******************************************************************************
3* Copyright (C) 2012-2014, International Business Machines
4* Corporation and others.  All Rights Reserved.
5*******************************************************************************
6* uitercollationiterator.h
7*
8* created on: 2012sep23 (from utf16collationiterator.h)
9* created by: Markus W. Scherer
10*/
11
12#ifndef __UITERCOLLATIONITERATOR_H__
13#define __UITERCOLLATIONITERATOR_H__
14
15#include "unicode/utypes.h"
16
17#if !UCONFIG_NO_COLLATION
18
19#include "unicode/uiter.h"
20#include "cmemory.h"
21#include "collation.h"
22#include "collationdata.h"
23#include "normalizer2impl.h"
24
25U_NAMESPACE_BEGIN
26
27/**
28 * UCharIterator-based collation element and character iterator.
29 * Handles normalized text inline, with length or NUL-terminated.
30 * Unnormalized text is handled by a subclass.
31 */
32class U_I18N_API UIterCollationIterator : public CollationIterator {
33public:
34    UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
35            : CollationIterator(d, numeric), iter(ui) {}
36
37    virtual ~UIterCollationIterator();
38
39    virtual void resetToOffset(int32_t newOffset);
40
41    virtual int32_t getOffset() const;
42
43    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
44
45    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
46
47protected:
48    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
49
50    virtual UChar handleGetTrailSurrogate();
51
52    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
53
54    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
55
56    UCharIterator &iter;
57};
58
59/**
60 * Incrementally checks the input text for FCD and normalizes where necessary.
61 */
62class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
63public:
64    FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
65            : UIterCollationIterator(data, numeric, ui),
66              state(ITER_CHECK_FWD), start(startIndex),
67              nfcImpl(data->nfcImpl) {}
68
69    virtual ~FCDUIterCollationIterator();
70
71    virtual void resetToOffset(int32_t newOffset);
72
73    virtual int32_t getOffset() const;
74
75    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
76
77    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
78
79protected:
80    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
81
82    virtual UChar handleGetTrailSurrogate();
83
84    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
85
86    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
87
88private:
89    /**
90     * Switches to forward checking if possible.
91     */
92    void switchToForward();
93
94    /**
95     * Extends the FCD text segment forward or normalizes around pos.
96     * @return TRUE if success
97     */
98    UBool nextSegment(UErrorCode &errorCode);
99
100    /**
101     * Switches to backward checking.
102     */
103    void switchToBackward();
104
105    /**
106     * Extends the FCD text segment backward or normalizes around pos.
107     * @return TRUE if success
108     */
109    UBool previousSegment(UErrorCode &errorCode);
110
111    UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
112
113    enum State {
114        /**
115         * The input text [start..(iter index)[ passes the FCD check.
116         * Moving forward checks incrementally.
117         * pos & limit are undefined.
118         */
119        ITER_CHECK_FWD,
120        /**
121         * The input text [(iter index)..limit[ passes the FCD check.
122         * Moving backward checks incrementally.
123         * start & pos are undefined.
124         */
125        ITER_CHECK_BWD,
126        /**
127         * The input text [start..limit[ passes the FCD check.
128         * pos tracks the current text index.
129         */
130        ITER_IN_FCD_SEGMENT,
131        /**
132         * The input text [start..limit[ failed the FCD check and was normalized.
133         * pos tracks the current index in the normalized string.
134         * The text iterator is at the limit index.
135         */
136        IN_NORM_ITER_AT_LIMIT,
137        /**
138         * The input text [start..limit[ failed the FCD check and was normalized.
139         * pos tracks the current index in the normalized string.
140         * The text iterator is at the start index.
141         */
142        IN_NORM_ITER_AT_START
143    };
144
145    State state;
146
147    int32_t start;
148    int32_t pos;
149    int32_t limit;
150
151    const Normalizer2Impl &nfcImpl;
152    UnicodeString normalized;
153};
154
155U_NAMESPACE_END
156
157#endif  // !UCONFIG_NO_COLLATION
158#endif  // __UITERCOLLATIONITERATOR_H__
159