164339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// Copyright (C) 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
58de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert* Copyright (C) 2012-2016, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* uitercollationiterator.h
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012sep23 (from utf16collationiterator.h)
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __UITERCOLLATIONITERATOR_H__
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __UITERCOLLATIONITERATOR_H__
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uiter.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
258de051c3d18a56cc126f0f44e368495a52f9148cFredrik Roubert#include "collationiterator.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * UCharIterator-based collation element and character iterator.
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Handles normalized text inline, with length or NUL-terminated.
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Unnormalized text is handled by a subclass.
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API UIterCollationIterator : public CollationIterator {
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UIterCollationIterator(const CollationData *d, UBool numeric, UCharIterator &ui)
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : CollationIterator(d, numeric), iter(ui) {}
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~UIterCollationIterator();
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void resetToOffset(int32_t newOffset);
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual int32_t getOffset() const;
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar handleGetTrailSurrogate();
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharIterator &iter;
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Incrementally checks the input text for FCD and normalizes where necessary.
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API FCDUIterCollationIterator : public UIterCollationIterator {
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUIterCollationIterator(const CollationData *data, UBool numeric, UCharIterator &ui, int32_t startIndex)
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : UIterCollationIterator(data, numeric, ui),
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              state(ITER_CHECK_FWD), start(startIndex),
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              nfcImpl(data->nfcImpl) {}
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~FCDUIterCollationIterator();
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void resetToOffset(int32_t newOffset);
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual int32_t getOffset() const;
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar handleGetTrailSurrogate();
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Switches to forward checking if possible.
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void switchToForward();
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Extends the FCD text segment forward or normalizes around pos.
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if success
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool nextSegment(UErrorCode &errorCode);
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Switches to backward checking.
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void switchToBackward();
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Extends the FCD text segment backward or normalizes around pos.
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if success
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool previousSegment(UErrorCode &errorCode);
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool normalize(const UnicodeString &s, UErrorCode &errorCode);
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    enum State {
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The input text [start..(iter index)[ passes the FCD check.
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Moving forward checks incrementally.
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * pos & limit are undefined.
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ITER_CHECK_FWD,
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The input text [(iter index)..limit[ passes the FCD check.
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * Moving backward checks incrementally.
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * start & pos are undefined.
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ITER_CHECK_BWD,
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The input text [start..limit[ passes the FCD check.
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * pos tracks the current text index.
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ITER_IN_FCD_SEGMENT,
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The input text [start..limit[ failed the FCD check and was normalized.
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * pos tracks the current index in the normalized string.
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The text iterator is at the limit index.
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        IN_NORM_ITER_AT_LIMIT,
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /**
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The input text [start..limit[ failed the FCD check and was normalized.
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * pos tracks the current index in the normalized string.
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         * The text iterator is at the start index.
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius         */
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        IN_NORM_ITER_AT_START
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    State state;
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t start;
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t pos;
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t limit;
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2Impl &nfcImpl;
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString normalized;
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __UITERCOLLATIONITERATOR_H__
162