1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2010-2014, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* utf16collationiterator.h
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2010oct27
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __UTF16COLLATIONITERATOR_H__
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __UTF16COLLATIONITERATOR_H__
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * UTF-16 collation element and character iterator.
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Handles normalized UTF-16 text inline, with length or NUL-terminated.
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Unnormalized text is handled by a subclass.
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API UTF16CollationIterator : public CollationIterator {
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator(const CollationData *d, UBool numeric,
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           const UChar *s, const UChar *p, const UChar *lim)
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : CollationIterator(d, numeric),
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              start(s), pos(p), limit(lim) {}
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator(const UTF16CollationIterator &other, const UChar *newText);
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~UTF16CollationIterator();
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool operator==(const CollationIterator &other) const;
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void resetToOffset(int32_t newOffset);
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual int32_t getOffset() const;
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setText(const UChar *s, const UChar *lim) {
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reset();
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        start = pos = s;
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        limit = lim;
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Copy constructor only for subclasses which set the pointers.
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UTF16CollationIterator(const UTF16CollationIterator &other)
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : CollationIterator(other),
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              start(NULL), pos(NULL), limit(NULL) {}
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar handleGetTrailSurrogate();
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool foundNULTerminator();
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // UTF-16 string pointers.
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // limit can be NULL for NUL-terminated strings.
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *start, *pos, *limit;
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Incrementally checks the input text for FCD and normalizes where necessary.
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API FCDUTF16CollationIterator : public UTF16CollationIterator {
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUTF16CollationIterator(const CollationData *data, UBool numeric,
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              const UChar *s, const UChar *p, const UChar *lim)
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : UTF16CollationIterator(data, numeric, s, p, lim),
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              rawStart(s), segmentStart(p), segmentLimit(NULL), rawLimit(lim),
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              nfcImpl(data->nfcImpl),
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              checkDir(1) {}
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    FCDUTF16CollationIterator(const FCDUTF16CollationIterator &other, const UChar *newText);
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~FCDUTF16CollationIterator();
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool operator==(const CollationIterator &other) const;
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void resetToOffset(int32_t newOffset);
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual int32_t getOffset() const;
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 nextCodePoint(UErrorCode &errorCode);
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 previousCodePoint(UErrorCode &errorCode);
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool foundNULTerminator();
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode);
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode);
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Switches to forward checking if possible.
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * To be called when checkDir < 0 || (checkDir == 0 && pos == limit).
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns with checkDir > 0 || (checkDir == 0 && pos != limit).
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void switchToForward();
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Extend the FCD text segment forward or normalize around pos.
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * To be called when checkDir > 0 && pos != limit.
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if success, checkDir == 0 and pos != limit
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool nextSegment(UErrorCode &errorCode);
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Switches to backward checking.
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * To be called when checkDir > 0 || (checkDir == 0 && pos == start).
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns with checkDir < 0 || (checkDir == 0 && pos != start).
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void switchToBackward();
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Extend the FCD text segment backward or normalize around pos.
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * To be called when checkDir < 0 && pos != start.
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return TRUE if success, checkDir == 0 and pos != start
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool previousSegment(UErrorCode &errorCode);
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool normalize(const UChar *from, const UChar *to, UErrorCode &errorCode);
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Text pointers: The input text is [rawStart, rawLimit[
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // where rawLimit can be NULL for NUL-terminated text.
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // checkDir > 0:
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The input text [segmentStart..pos[ passes the FCD check.
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Moving forward checks incrementally.
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // segmentLimit is undefined. limit == rawLimit.
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // checkDir < 0:
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The input text [pos..segmentLimit[ passes the FCD check.
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Moving backward checks incrementally.
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // segmentStart is undefined, start == rawStart.
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // checkDir == 0:
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    //
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The input text [segmentStart..segmentLimit[ is being processed.
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // These pointers are at FCD boundaries.
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Either this text segment already passes the FCD check
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // and segmentStart==start<=pos<=limit==segmentLimit,
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // or the current segment had to be normalized so that
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // [segmentStart..segmentLimit[ turned into the normalized string,
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // corresponding to normalized.getBuffer()==start<=pos<=limit==start+normalized.length().
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *rawStart;
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *segmentStart;
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *segmentLimit;
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // rawLimit==NULL for a NUL-terminated string.
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *rawLimit;
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const Normalizer2Impl &nfcImpl;
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString normalized;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Direction of incremental FCD check. See comments before rawStart.
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int8_t checkDir;
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __UTF16COLLATIONITERATOR_H__
185