1// Copyright (C) 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2012-2016, International Business Machines
6* Corporation and others.  All Rights Reserved.
7*******************************************************************************
8* uitercollationiterator.h
9*
10* created on: 2012sep23 (from utf16collationiterator.h)
11* created by: Markus W. Scherer
12*/
13
14#ifndef __UITERCOLLATIONITERATOR_H__
15#define __UITERCOLLATIONITERATOR_H__
16
17#include "unicode/utypes.h"
18
19#if !UCONFIG_NO_COLLATION
20
21#include "unicode/uiter.h"
22#include "cmemory.h"
23#include "collation.h"
24#include "collationdata.h"
25#include "collationiterator.h"
26#include "normalizer2impl.h"
27
28U_NAMESPACE_BEGIN
29
30/**
31 * UCharIterator-based collation element and character iterator.
32 * Handles normalized text inline, with length or NUL-terminated.
33 * Unnormalized text is handled by a subclass.
34 */
35class U_I18N_API UIterCollationIterator : public CollationIterator {
36public:
37    UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
38            : CollationIterator(d, numeric), iter(ui) {}
39
40    virtual ~UIterCollationIterator();
41
42    virtual void resetToOffset(int32_t newOffset);
43
44    virtual int32_t getOffset() const;
45
46    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
47
48    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
49
50protected:
51    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
52
53    virtual UChar handleGetTrailSurrogate();
54
55    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
56
57    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
58
59    UCharIterator &iter;
60};
61
62/**
63 * Incrementally checks the input text for FCD and normalizes where necessary.
64 */
65class U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
66public:
67    FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
68            : UIterCollationIterator(data, numeric, ui),
69              state(ITER_CHECK_FWD), start(startIndex),
70              nfcImpl(data->nfcImpl) {}
71
72    virtual ~FCDUIterCollationIterator();
73
74    virtual void resetToOffset(int32_t newOffset);
75
76    virtual int32_t getOffset() const;
77
78    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
79
80    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
81
82protected:
83    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
84
85    virtual UChar handleGetTrailSurrogate();
86
87    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
88
89    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
90
91private:
92    /**
93     * Switches to forward checking if possible.
94     */
95    void switchToForward();
96
97    /**
98     * Extends the FCD text segment forward or normalizes around pos.
99     * @return TRUE if success
100     */
101    UBool nextSegment(UErrorCode &errorCode);
102
103    /**
104     * Switches to backward checking.
105     */
106    void switchToBackward();
107
108    /**
109     * Extends the FCD text segment backward or normalizes around pos.
110     * @return TRUE if success
111     */
112    UBool previousSegment(UErrorCode &errorCode);
113
114    UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
115
116    enum State {
117        /**
118         * The input text [start..(iter index)[ passes the FCD check.
119         * Moving forward checks incrementally.
120         * pos & limit are undefined.
121         */
122        ITER_CHECK_FWD,
123        /**
124         * The input text [(iter index)..limit[ passes the FCD check.
125         * Moving backward checks incrementally.
126         * start & pos are undefined.
127         */
128        ITER_CHECK_BWD,
129        /**
130         * The input text [start..limit[ passes the FCD check.
131         * pos tracks the current text index.
132         */
133        ITER_IN_FCD_SEGMENT,
134        /**
135         * The input text [start..limit[ failed the FCD check and was normalized.
136         * pos tracks the current index in the normalized string.
137         * The text iterator is at the limit index.
138         */
139        IN_NORM_ITER_AT_LIMIT,
140        /**
141         * The input text [start..limit[ failed the FCD check and was normalized.
142         * pos tracks the current index in the normalized string.
143         * The text iterator is at the start index.
144         */
145        IN_NORM_ITER_AT_START
146    };
147
148    State state;
149
150    int32_t start;
151    int32_t pos;
152    int32_t limit;
153
154    const Normalizer2Impl &nfcImpl;
155    UnicodeString normalized;
156};
157
158U_NAMESPACE_END
159
160#endif  // !UCONFIG_NO_COLLATION
161#endif  // __UITERCOLLATIONITERATOR_H__
162