1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2010-2014, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationiterator.h
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2010oct27
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#ifndef __COLLATIONITERATOR_H__
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#define __COLLATIONITERATOR_H__
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass SkippedState;
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UCharsTrie;
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass UVector32;
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Collation element iterator and abstract character iterator.
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * When a method returns a code point value, it must be in 0..10FFFF,
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * except it can be negative as a sentinel value.
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass U_I18N_API CollationIterator : public UObject {
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    class CEBuffer {
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    private:
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        /** Large enough for CEs of most short strings. */
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        static const int32_t INITIAL_CAPACITY = 40;
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    public:
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CEBuffer() : length(0) {}
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ~CEBuffer();
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inline void append(int64_t ce, UErrorCode &errorCode) {
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) {
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                buffer[length++] = ce;
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inline void appendUnsafe(int64_t ce) {
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[length++] = ce;
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UBool ensureAppendCapacity(int32_t appCap, UErrorCode &errorCode);
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inline UBool incLength(UErrorCode &errorCode) {
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Use INITIAL_CAPACITY for a very simple fastpath.
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // (Rather than buffer.getCapacity().)
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(length < INITIAL_CAPACITY || ensureAppendCapacity(1, errorCode)) {
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++length;
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return TRUE;
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return FALSE;
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inline int64_t set(int32_t i, int64_t ce) {
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return buffer[i] = ce;
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inline int64_t get(int32_t i) const { return buffer[i]; }
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const int64_t *getCEs() const { return buffer.getAlias(); }
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length;
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    private:
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        CEBuffer(const CEBuffer &);
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        void operator=(const CEBuffer &);
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        MaybeStackArray<int64_t, INITIAL_CAPACITY> buffer;
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    };
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationIterator(const CollationData *d, UBool numeric)
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            : trie(d->trie),
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              data(d),
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              cesIndex(0),
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              skipped(NULL),
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              numCpFwd(-1),
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius              isNumeric(numeric) {}
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~CollationIterator();
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool operator==(const CollationIterator &other) const;
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline UBool operator!=(const CollationIterator &other) const {
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return !operator==(other);
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Resets the iterator state and sets the position to the specified offset.
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Subclasses must implement, and must call the parent class method,
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * or CollationIterator::reset().
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void resetToOffset(int32_t newOffset) = 0;
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual int32_t getOffset() const = 0;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the next collation element.
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline int64_t nextCE(UErrorCode &errorCode) {
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(cesIndex < ceBuffer.length) {
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Return the next buffered CE.
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return ceBuffer.get(cesIndex++);
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // assert cesIndex == ceBuffer.length;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!ceBuffer.incLength(errorCode)) {
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return Collation::NO_CE;
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c;
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t ce32 = handleNextCE32(c, errorCode);
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t t = ce32 & 0xff;
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(t < Collation::SPECIAL_CE32_LOW_BYTE) {  // Forced-inline of isSpecialCE32(ce32).
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Normal CE from the main data.
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Forced-inline of ceFromSimpleCE32(ce32).
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return ceBuffer.set(cesIndex++,
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8));
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const CollationData *d;
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // The compiler should be able to optimize the previous and the following
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // comparisons of t with the same constant.
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(t == Collation::SPECIAL_CE32_LOW_BYTE) {
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(c < 0) {
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return ceBuffer.set(cesIndex++, Collation::NO_CE);
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            d = data->base;
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 = d->getCE32(c);
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            t = ce32 & 0xff;
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(t < Collation::SPECIAL_CE32_LOW_BYTE) {
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Normal CE from the base data.
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return ceBuffer.set(cesIndex++,
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        ((int64_t)(ce32 & 0xffff0000) << 32) | ((ce32 & 0xff00) << 16) | (t << 8));
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            d = data;
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(t == Collation::LONG_PRIMARY_CE32_LOW_BYTE) {
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Forced-inline of ceFromLongPrimaryCE32(ce32).
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return ceBuffer.set(cesIndex++,
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ((int64_t)(ce32 - t) << 32) | Collation::COMMON_SEC_AND_TER_CE);
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return nextCEFromCE32(d, c, ce32, errorCode);
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Fetches all CEs.
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return getCEsLength()
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fetchCEs(UErrorCode &errorCode);
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Overwrites the current CE (the last one returned by nextCE()).
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void setCurrentCE(int64_t ce) {
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // assert cesIndex > 0;
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ceBuffer.set(cesIndex - 1, ce);
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the previous collation element.
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t previousCE(UVector32 &offsets, UErrorCode &errorCode);
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline int32_t getCEsLength() const {
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ceBuffer.length;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    inline int64_t getCE(int32_t i) const {
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ceBuffer.get(i);
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const int64_t *getCEs() const {
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ceBuffer.getCEs();
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void clearCEs() {
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        cesIndex = ceBuffer.length = 0;
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void clearCEsIfNoneRemaining() {
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(cesIndex == ceBuffer.length) { clearCEs(); }
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the next code point (with post-increment).
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Public for identical-level comparison and for testing.
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 nextCodePoint(UErrorCode &errorCode) = 0;
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the previous code point (with pre-decrement).
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Public for identical-level comparison and for testing.
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar32 previousCodePoint(UErrorCode &errorCode) = 0;
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprotected:
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationIterator(const CollationIterator &other);
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void reset();
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the next code point and its local CE32 value.
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns Collation::FALLBACK_CE32 at the end of the text (c<0)
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * or when c's CE32 value is to be looked up in the base data (fallback).
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * The code point is used for fallbacks, context and implicit weights.
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * It is ignored when the returned CE32 is not special (e.g., FFFD_CE32).
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t handleNextCE32(UChar32 &c, UErrorCode &errorCode);
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Called when handleNextCE32() returns a LEAD_SURROGATE_TAG for a lead surrogate code unit.
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the trail surrogate in that case and advances past it,
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * if a trail surrogate follows the lead surrogate.
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Otherwise returns any other code unit and does not advance.
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UChar handleGetTrailSurrogate();
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Called when handleNextCE32() returns with c==0, to see whether it is a NUL terminator.
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * (Not needed in Java.)
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool foundNULTerminator();
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * @return FALSE if surrogate code points U+D800..U+DFFF
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *         map to their own implicit primary weights (for UTF-16),
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *         or TRUE if they map to CE(U+FFFD) (for UTF-8)
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual UBool forbidSurrogateCodePoints() const;
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void forwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0;
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void backwardNumCodePoints(int32_t num, UErrorCode &errorCode) = 0;
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the CE32 from the data trie.
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Normally the same as data->getCE32(), but overridden in the builder.
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Call this only when the faster data->getCE32() cannot be used.
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t getDataCE32(UChar32 c) const;
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual uint32_t getCE32FromBuilderData(uint32_t ce32, UErrorCode &errorCode);
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendCEsFromCE32(const CollationData *d, UChar32 c, uint32_t ce32,
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UBool forward, UErrorCode &errorCode);
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Main lookup trie of the data object.
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UTrie2 *trie;
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *data;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t nextCEFromCE32(const CollationData *d, UChar32 c, uint32_t ce32,
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode);
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t getCE32FromPrefix(const CollationData *d, uint32_t ce32,
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               UErrorCode &errorCode);
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UChar32 nextSkippedCodePoint(UErrorCode &errorCode);
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void backwardNumSkipped(int32_t n, UErrorCode &errorCode);
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t nextCE32FromContraction(
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            const CollationData *d, uint32_t contractionCE32,
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            const UChar *p, uint32_t ce32, UChar32 c,
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UErrorCode &errorCode);
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t nextCE32FromDiscontiguousContraction(
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            const CollationData *d, UCharsTrie &suffixes, uint32_t ce32,
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t lookAhead, UChar32 c,
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UErrorCode &errorCode);
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Returns the previous CE when data->isUnsafeBackward(c, isNumeric).
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t previousCEUnsafe(UChar32 c, UVector32 &offsets, UErrorCode &errorCode);
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Turns a string of digits (bytes 0..9)
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * into a sequence of CEs that will sort in numeric order.
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     *
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Starts from this ce32's digit value and consumes the following/preceding digits.
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * The digits string must not be empty and must not have leading zeros.
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendNumericCEs(uint32_t ce32, UBool forward, UErrorCode &errorCode);
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /**
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Turns 1..254 digits into a sequence of CEs.
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     * Called by appendNumericCEs() for each segment of at most 254 digits.
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius     */
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendNumericSegmentCEs(const char *digits, int32_t length, UErrorCode &errorCode);
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CEBuffer ceBuffer;
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t cesIndex;
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SkippedState *skipped;
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Number of code points to read forward, or -1.
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Used as a forward iteration limit in previousCEUnsafe().
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t numCpFwd;
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Numeric collation (CollationSettings::NUMERIC).
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool isNumeric;
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // __COLLATIONITERATOR_H__
320