1b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
2b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 1996-2014, International Business Machines Corporation and
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* others. All Rights Reserved.
5b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*******************************************************************************
6b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
7b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
8b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/*
9b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* File coleitr.cpp
10b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
11b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Created by: Helena Shih
12b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
13b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Modification History:
14b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
15b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*  Date      Name        Description
16b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*
17b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*  6/23/97   helena      Adding comments to make code more readable.
18b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 08/03/98   erm         Synched with 1.2 version of CollationElementIterator.java
19b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 12/10/99   aliu        Ported Thai collation support from Java.
20b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* 01/25/01   swquek      Modified to a C++ wrapper calling C APIs (ucoliter.h)
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 02/19/01   swquek      Removed CollationElementIterator() since it is
22b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*                        private constructor and no calls are made to it
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 2012-2014  markus      Rewritten in C++ again.
24b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
25b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
26b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/utypes.h"
27b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
28b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#if !UCONFIG_NO_COLLATION
29b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
30b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/coleitr.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h"
32b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "unicode/ustring.h"
33b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#include "cmemory.h"
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h"
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsets.h"
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h"
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uhash.h"
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h"
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uvectr32.h"
43b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
44b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* Constants --------------------------------------------------------------- */
45b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
46b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_BEGIN
47b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
48b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUOBJECT_DEFINE_RTTI_IMPLEMENTATION(CollationElementIterator)
49b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
50b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator public constructor/destructor ------------------ */
51b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
52b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator(
53b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         const CollationElementIterator& other)
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        : UObject(other), iter_(NULL), rbc_(NULL), otherHalf_(0), dir_(0), offsets_(NULL) {
55b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    *this = other;
56b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
57b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
58b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::~CollationElementIterator()
59b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete iter_;
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete offsets_;
62b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
63b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
64b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator public methods --------------------------------- */
65b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint32_t getFirstHalf(uint32_t p, uint32_t lower32) {
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return (p & 0xffff0000) | ((lower32 >> 16) & 0xff00) | ((lower32 >> 8) & 0xff);
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint32_t getSecondHalf(uint32_t p, uint32_t lower32) {
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return (p << 16) | ((lower32 >> 8) & 0xff00) | (lower32 & 0x3f);
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool ceNeedsTwoParts(int64_t ce) {
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return (ce & INT64_C(0xffff00ff003f)) != 0;
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::getOffset() const
81b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (dir_ < 0 && offsets_ != NULL && !offsets_->isEmpty()) {
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // CollationIterator::previousCE() decrements the CEs length
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // while it pops CEs from its internal buffer.
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t i = iter_->getCEsLength();
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (otherHalf_ != 0) {
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Return the trailing CE offset while we are in the middle of a 64-bit CE.
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ++i;
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(i < offsets_->size());
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return offsets_->elementAti(i);
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return iter_->getOffset();
94b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
95b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
96b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
97b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Get the ordering priority of the next character in the string.
98b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the next character's ordering. Returns NULLORDER if an error has
99b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*         occured or if the end of string has been reached
100b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
101b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::next(UErrorCode& status)
102b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) { return NULLORDER; }
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (dir_ > 1) {
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Continue forward iteration. Test this first.
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (otherHalf_ != 0) {
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t oh = otherHalf_;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            otherHalf_ = 0;
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return oh;
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (dir_ == 1) {
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // next() after setOffset()
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dir_ = 2;
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (dir_ == 0) {
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // The iter_ is already reset to the start of the text.
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dir_ = 2;
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else /* dir_ < 0 */ {
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // illegal change of direction
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_INVALID_STATE_ERROR;
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULLORDER;
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // No need to keep all CEs in the buffer when we iterate.
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    iter_->clearCEsIfNoneRemaining();
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t ce = iter_->nextCE(status);
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (ce == Collation::NO_CE) { return NULLORDER; }
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p = (uint32_t)(ce >> 32);
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t lower32 = (uint32_t)ce;
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t firstHalf = getFirstHalf(p, lower32);
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t secondHalf = getSecondHalf(p, lower32);
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (secondHalf != 0) {
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        otherHalf_ = secondHalf | 0xc0;  // continuation CE
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return firstHalf;
135b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
136b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
137b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool CollationElementIterator::operator!=(
138b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                  const CollationElementIterator& other) const
139b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
140b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return !(*this == other);
141b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
142b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
143b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruUBool CollationElementIterator::operator==(
144b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                    const CollationElementIterator& that) const
145b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (this == &that) {
147b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return TRUE;
148b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
149b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        (rbc_ == that.rbc_ || *rbc_ == *that.rbc_) &&
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        otherHalf_ == that.otherHalf_ &&
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        normalizeDir() == that.normalizeDir() &&
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        string_ == that.string_ &&
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *iter_ == *that.iter_;
156b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
157b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
158b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
159b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Get the ordering priority of the previous collation element in the string.
160b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @param status the error code status.
161b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* @return the previous element's ordering. Returns NULLORDER if an error has
162b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*         occured or if the start of string has been reached.
163b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
164b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::previous(UErrorCode& status)
165b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) { return NULLORDER; }
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (dir_ < 0) {
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Continue backwards iteration. Test this first.
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (otherHalf_ != 0) {
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t oh = otherHalf_;
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            otherHalf_ = 0;
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return oh;
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (dir_ == 0) {
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        iter_->resetToOffset(string_.length());
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dir_ = -1;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (dir_ == 1) {
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // previous() after setOffset()
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dir_ = -1;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else /* dir_ > 1 */ {
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // illegal change of direction
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_INVALID_STATE_ERROR;
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULLORDER;
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (offsets_ == NULL) {
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        offsets_ = new UVector32(status);
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (offsets_ == NULL) {
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            status = U_MEMORY_ALLOCATION_ERROR;
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return NULLORDER;
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // If we already have expansion CEs, then we also have offsets.
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Otherwise remember the trailing offset in case we need to
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // write offsets for an artificial expansion.
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t limitOffset = iter_->getCEsLength() == 0 ? iter_->getOffset() : 0;
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int64_t ce = iter_->previousCE(*offsets_, status);
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (ce == Collation::NO_CE) { return NULLORDER; }
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Turn the 64-bit CE into two old-style 32-bit CEs, without quaternary bits.
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t p = (uint32_t)(ce >> 32);
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t lower32 = (uint32_t)ce;
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t firstHalf = getFirstHalf(p, lower32);
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t secondHalf = getSecondHalf(p, lower32);
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (secondHalf != 0) {
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (offsets_->isEmpty()) {
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // When we convert a single 64-bit CE into two 32-bit CEs,
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // we need to make this artificial expansion behave like a normal expansion.
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // See CollationIterator::previousCE().
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offsets_->addElement(iter_->getOffset(), status);
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offsets_->addElement(limitOffset, status);
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        otherHalf_ = firstHalf;
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return secondHalf | 0xc0;  // continuation CE
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return firstHalf;
215b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
216b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
217b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
218b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Resets the cursor to the beginning of the string.
219b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
220b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::reset()
221b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    iter_ ->resetToOffset(0);
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    otherHalf_ = 0;
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    dir_ = 0;
225b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
226b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
227b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setOffset(int32_t newOffset,
228b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         UErrorCode& status)
229b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(status)) { return; }
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (0 < newOffset && newOffset < string_.length()) {
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t offset = newOffset;
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        do {
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar c = string_.charAt(offset);
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if (!rbc_->isUnsafe(c) ||
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    (U16_IS_LEAD(c) && !rbc_->isUnsafe(string_.char32At(offset)))) {
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                break;
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Back up to before this unsafe character.
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            --offset;
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } while (offset > 0);
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (offset < newOffset) {
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We might have backed up more than necessary.
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // For example, contractions "ch" and "cu" make both 'h' and 'u' unsafe,
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // but for text "chu" setOffset(2) should remain at 2
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // although we initially back up to offset 0.
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Find the last safe offset no greater than newOffset by iterating forward.
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t lastSafeOffset = offset;
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            do {
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                iter_->resetToOffset(lastSafeOffset);
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                do {
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    iter_->nextCE(status);
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if (U_FAILURE(status)) { return; }
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } while ((offset = iter_->getOffset()) == lastSafeOffset);
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if (offset <= newOffset) {
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    lastSafeOffset = offset;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } while (offset < newOffset);
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            newOffset = lastSafeOffset;
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    iter_->resetToOffset(newOffset);
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    otherHalf_ = 0;
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    dir_ = 1;
265b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
266b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
267b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
268b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* Sets the source to the new source string.
269b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
270b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setText(const UnicodeString& source,
271b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                       UErrorCode& status)
272b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
273b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status)) {
274b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
275b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
276b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    string_ = source;
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *s = string_.getBuffer();
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationIterator *newIter;
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool numeric = rbc_->settings->isNumeric();
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (rbc_->settings->dontCheckFCD()) {
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        newIter = new UTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        newIter = new FCDUTF16CollationIterator(rbc_->data, numeric, s, s, s + string_.length());
285b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (newIter == NULL) {
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        status = U_MEMORY_ALLOCATION_ERROR;
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
289b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    delete iter_;
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    iter_ = newIter;
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    otherHalf_ = 0;
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    dir_ = 0;
294b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
295b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
296b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru// Sets the source to the new character iterator.
297b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruvoid CollationElementIterator::setText(CharacterIterator& source,
298b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                       UErrorCode& status)
299b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
300b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (U_FAILURE(status))
301b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        return;
302b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    source.getText(string_);
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setText(string_, status);
305b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
306b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
307b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruint32_t CollationElementIterator::strengthOrder(int32_t order) const
308b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UColAttributeValue s = (UColAttributeValue)rbc_->settings->getStrength();
310b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    // Mask off the unwanted differences.
311b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    if (s == UCOL_PRIMARY) {
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order &= 0xffff0000;
313b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
314b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    else if (s == UCOL_SECONDARY) {
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        order &= 0xffffff00;
316b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
317b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
318b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    return order;
319b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
320b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
321b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/* CollationElementIterator private constructors/destructors --------------- */
322b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
323b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
324b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the "real" constructor for this class; it constructs an iterator
325b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* over the source text using the specified collator
326b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
327b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator(
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                               const UnicodeString &source,
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                               const RuleBasedCollator *coll,
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                               UErrorCode &status)
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setText(source, status);
333b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
334b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
335b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru/**
336b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* This is the "real" constructor for this class; it constructs an iterator over
337b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru* the source text using the specified collator
338b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru*/
339b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruCollationElementIterator::CollationElementIterator(
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                           const CharacterIterator &source,
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                           const RuleBasedCollator *coll,
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                           UErrorCode &status)
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        : iter_(NULL), rbc_(coll), otherHalf_(0), dir_(0), offsets_(NULL) {
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // We only call source.getText() which should be const anyway.
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setText(const_cast<CharacterIterator &>(source), status);
346b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
347b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* CollationElementIterator private methods -------------------------------- */
349b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
350b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queruconst CollationElementIterator& CollationElementIterator::operator=(
351b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru                                         const CollationElementIterator& other)
352b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru{
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (this == &other) {
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return *this;
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationIterator *newIter;
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const FCDUTF16CollationIterator *otherFCDIter =
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dynamic_cast<const FCDUTF16CollationIterator *>(other.iter_);
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(otherFCDIter != NULL) {
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        newIter = new FCDUTF16CollationIterator(*otherFCDIter, string_.getBuffer());
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UTF16CollationIterator *otherIter =
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                dynamic_cast<const UTF16CollationIterator *>(other.iter_);
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(otherIter != NULL) {
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            newIter = new UTF16CollationIterator(*otherIter, string_.getBuffer());
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            newIter = NULL;
369b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(newIter != NULL) {
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        delete iter_;
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        iter_ = newIter;
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        rbc_ = other.rbc_;
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        otherHalf_ = other.otherHalf_;
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        dir_ = other.dir_;
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        string_ = other.string_;
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(other.dir_ < 0 && other.offsets_ != NULL && !other.offsets_->isEmpty()) {
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UErrorCode errorCode = U_ZERO_ERROR;
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(offsets_ == NULL) {
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offsets_ = new UVector32(other.offsets_->size(), errorCode);
384b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(offsets_ != NULL) {
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offsets_->assign(*other.offsets_, errorCode);
387b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return *this;
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
393b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass MaxExpSink : public ContractionsAndExpansions::CESink {
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    MaxExpSink(UHashtable *h, UErrorCode &ec) : maxExpansions(h), errorCode(ec) {}
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual ~MaxExpSink();
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void handleCE(int64_t /*ce*/) {}
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    virtual void handleExpansion(const int64_t ces[], int32_t length) {
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (length <= 1) {
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We do not need to add single CEs into the map.
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
403b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t count = 0;  // number of CE "halves"
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for (int32_t i = 0; i < length; ++i) {
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            count += ceNeedsTwoParts(ces[i]) ? 2 : 1;
407b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // last "half" of the last CE
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce = ces[length - 1];
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = (uint32_t)(ce >> 32);
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t lower32 = (uint32_t)ce;
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t lastHalf = getSecondHalf(p, lower32);
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (lastHalf == 0) {
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            lastHalf = getFirstHalf(p, lower32);
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            U_ASSERT(lastHalf != 0);
416c69afcec261fc345fda8daf46f0ea6b4351dc777Jean-Baptiste Queru        } else {
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            lastHalf |= 0xc0;  // old-style continuation CE
418b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (count > uhash_igeti(maxExpansions, (int32_t)lastHalf)) {
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uhash_iputi(maxExpansions, (int32_t)lastHalf, count, &errorCode);
421b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru        }
422b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru    }
423b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UHashtable *maxExpansions;
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UErrorCode &errorCode;
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusMaxExpSink::~MaxExpSink() {}
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUHashtable *
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::computeMaxExpansions(const CollationData *data, UErrorCode &errorCode) {
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode)) { return NULL; }
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UHashtable *maxExpansions = uhash_open(uhash_hashLong, uhash_compareLong,
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                           uhash_compareLong, &errorCode);
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode)) { return NULL; }
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    MaxExpSink sink(maxExpansions, errorCode);
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ContractionsAndExpansions(NULL, NULL, &sink, TRUE).forData(data, errorCode);
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (U_FAILURE(errorCode)) {
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uhash_close(maxExpansions);
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return maxExpansions;
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::getMaxExpansion(int32_t order) const {
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return getMaxExpansion(rbc_->tailoring->maxExpansions, order);
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationElementIterator::getMaxExpansion(const UHashtable *maxExpansions, int32_t order) {
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (order == 0) { return 1; }
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t max;
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(maxExpansions != NULL && (max = uhash_igeti(maxExpansions, order)) != 0) {
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return max;
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if ((order & 0xc0) == 0xc0) {
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // old-style continuation CE
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return 2;
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return 1;
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
466b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru}
467b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
468b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste QueruU_NAMESPACE_END
469b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru
470b13da9df870a61b11249bf741347908dbea0edd8Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_COLLATION */
471