10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2013-2014, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationsets.cpp
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013feb09
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucharstrie.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/unistr.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ustringtrie.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsets.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utf16collationiterator.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumTailoredRange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ce32 == Collation::FALLBACK_CE32) {
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return TRUE;  // fallback to base, not tailored
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    TailoredSet *ts = (TailoredSet *)context;
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return ts->handleCE32(start, end, ce32);
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_END
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::forData(const CollationData *d, UErrorCode &ec) {
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(ec)) { return; }
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode = ec;  // Preserve info & warning codes.
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    data = d;
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    baseData = d->base;
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(baseData != NULL);
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utrie2_enum(data->trie, NULL, enumTailoredRange, this);
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ec = errorCode;
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(ce32 != Collation::FALLBACK_CE32);
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isSpecialCE32(ce32)) {
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ce32 = data->getIndirectCE32(ce32);
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ce32 == Collation::FALLBACK_CE32) {
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return U_SUCCESS(errorCode);
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    do {
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t baseCE32 = baseData->getFinalCE32(baseData->getCE32(start));
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Do not just continue if ce32 == baseCE32 because
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // contractions and expansions in different data objects
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // normally differ even if they have the same data offsets.
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(Collation::isSelfContainedCE32(ce32) && Collation::isSelfContainedCE32(baseCE32)) {
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // fastpath
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(ce32 != baseCE32) {
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tailored->add(start);
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            compare(start, ce32, baseCE32);
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } while(++start <= end);
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return U_SUCCESS(errorCode);
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::compare(UChar32 c, uint32_t ce32, uint32_t baseCE32) {
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isPrefixCE32(ce32)) {
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ce32 = data->getFinalCE32(CollationData::readCE32(p));
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(Collation::isPrefixCE32(baseCE32)) {
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            comparePrefixes(c, p + 2, q + 2);
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addPrefixes(data, c, p + 2);
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(Collation::isPrefixCE32(baseCE32)) {
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addPrefixes(baseData, c, q + 2);
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isContractionCE32(ce32)) {
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 = Collation::NO_CE32;
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 = data->getFinalCE32(CollationData::readCE32(p));
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(Collation::isContractionCE32(baseCE32)) {
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((baseCE32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                baseCE32 = Collation::NO_CE32;
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            compareContractions(c, p + 2, q + 2);
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addContractions(c, p + 2);
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(Collation::isContractionCE32(baseCE32)) {
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UChar *q = baseData->contexts + Collation::indexFromCE32(baseCE32);
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        baseCE32 = baseData->getFinalCE32(CollationData::readCE32(q));
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addContractions(c, q + 2);
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t tag;
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isSpecialCE32(ce32)) {
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tag = Collation::tagFromCE32(ce32);
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(tag != Collation::PREFIX_TAG);
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(tag != Collation::CONTRACTION_TAG);
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Currently, the tailoring data builder does not write offset tags.
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // They might be useful for saving space,
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // but they would complicate the builder,
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // and in tailorings we assume that performance of tailored characters is more important.
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(tag != Collation::OFFSET_TAG);
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tag = -1;
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t baseTag;
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isSpecialCE32(baseCE32)) {
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        baseTag = Collation::tagFromCE32(baseCE32);
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(baseTag != Collation::PREFIX_TAG);
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(baseTag != Collation::CONTRACTION_TAG);
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        baseTag = -1;
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Non-contextual mappings, expansions, etc.
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(baseTag == Collation::OFFSET_TAG) {
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // We might be comparing a tailoring CE which is a copy of
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // a base offset-tag CE, via the [optimize [set]] syntax
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // or when a single-character mapping was copied for tailored contractions.
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Offset tags always result in long-primary CEs,
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // with common secondary/tertiary weights.
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!Collation::isLongPrimaryCE32(ce32)) {
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            add(c);
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t dataCE = baseData->ces[Collation::indexFromCE32(baseCE32)];
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = Collation::getThreeBytePrimaryForOffsetData(c, dataCE);
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(Collation::primaryFromLongPrimaryCE32(ce32) != p) {
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            add(c);
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(tag != baseTag) {
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        add(c);
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(tag == Collation::EXPANSION32_TAG) {
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length = Collation::lengthFromCE32(ce32);
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint32_t *baseCE32s = baseData->ce32s + Collation::indexFromCE32(baseCE32);
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(length != baseLength) {
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            add(c);
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < length; ++i) {
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(ce32s[i] != baseCE32s[i]) {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                add(c);
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                break;
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(tag == Collation::EXPANSION_TAG) {
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const int64_t *ces = data->ces + Collation::indexFromCE32(ce32);
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length = Collation::lengthFromCE32(ce32);
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const int64_t *baseCEs = baseData->ces + Collation::indexFromCE32(baseCE32);
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t baseLength = Collation::lengthFromCE32(baseCE32);
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(length != baseLength) {
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            add(c);
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < length; ++i) {
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(ces[i] != baseCEs[i]) {
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                add(c);
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                break;
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(tag == Collation::HANGUL_TAG) {
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar jamos[3];
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length = Hangul::decompose(c, jamos);
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(tailored->contains(jamos[0]) || tailored->contains(jamos[1]) ||
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                (length == 3 && tailored->contains(jamos[2]))) {
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            add(c);
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(ce32 != baseCE32) {
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        add(c);
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::comparePrefixes(UChar32 c, const UChar *p, const UChar *q) {
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Parallel iteration over prefixes of both tables.
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator prefixes(p, 0, errorCode);
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator basePrefixes(q, 0, errorCode);
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString *tp = NULL;  // Tailoring prefix.
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString *bp = NULL;  // Base prefix.
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Use a string with a U+FFFF as the limit sentinel.
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // U+FFFF is untailorable and will not occur in prefixes.
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString none((UChar)0xffff);
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(tp == NULL) {
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(prefixes.next(errorCode)) {
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tp = &prefixes.getString();
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tp = &none;
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(bp == NULL) {
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(basePrefixes.next(errorCode)) {
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                bp = &basePrefixes.getString();
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                bp = &none;
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(tp == &none && bp == &none) { break; }
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t cmp = tp->compare(*bp);
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(cmp < 0) {
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // tp occurs in the tailoring but not in the base.
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addPrefix(data, *tp, c, (uint32_t)prefixes.getValue());
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tp = NULL;
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(cmp > 0) {
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // bp occurs in the base but not in the tailoring.
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addPrefix(baseData, *bp, c, (uint32_t)basePrefixes.getValue());
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            bp = NULL;
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            setPrefix(*tp);
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            compare(c, (uint32_t)prefixes.getValue(), (uint32_t)basePrefixes.getValue());
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            resetPrefix();
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tp = NULL;
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            bp = NULL;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::compareContractions(UChar32 c, const UChar *p, const UChar *q) {
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Parallel iteration over suffixes of both tables.
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator suffixes(p, 0, errorCode);
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator baseSuffixes(q, 0, errorCode);
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString *ts = NULL;  // Tailoring suffix.
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UnicodeString *bs = NULL;  // Base suffix.
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Use a string with two U+FFFF as the limit sentinel.
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // U+FFFF is untailorable and will not occur in contractions except maybe
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // as a single suffix character for a root-collator boundary contraction.
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString none((UChar)0xffff);
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    none.append((UChar)0xffff);
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ts == NULL) {
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(suffixes.next(errorCode)) {
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ts = &suffixes.getString();
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ts = &none;
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(bs == NULL) {
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(baseSuffixes.next(errorCode)) {
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                bs = &baseSuffixes.getString();
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                bs = &none;
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ts == &none && bs == &none) { break; }
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t cmp = ts->compare(*bs);
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(cmp < 0) {
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // ts occurs in the tailoring but not in the base.
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addSuffix(c, *ts);
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ts = NULL;
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else if(cmp > 0) {
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // bs occurs in the base but not in the tailoring.
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addSuffix(c, *bs);
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            bs = NULL;
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            suffix = ts;
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            compare(c, (uint32_t)suffixes.getValue(), (uint32_t)baseSuffixes.getValue());
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            suffix = NULL;
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ts = NULL;
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            bs = NULL;
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addPrefixes(const CollationData *d, UChar32 c, const UChar *p) {
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator prefixes(p, 0, errorCode);
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(prefixes.next(errorCode)) {
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addPrefix(d, prefixes.getString(), c, (uint32_t)prefixes.getValue());
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addPrefix(const CollationData *d, const UnicodeString &pfx, UChar32 c, uint32_t ce32) {
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    setPrefix(pfx);
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ce32 = d->getFinalCE32(ce32);
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(Collation::isContractionCE32(ce32)) {
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const UChar *p = d->contexts + Collation::indexFromCE32(ce32);
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addContractions(c, p + 2);
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    tailored->add(UnicodeString(unreversedPrefix).append(c));
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    resetPrefix();
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addContractions(UChar32 c, const UChar *p) {
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator suffixes(p, 0, errorCode);
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(suffixes.next(errorCode)) {
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addSuffix(c, suffixes.getString());
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::addSuffix(UChar32 c, const UnicodeString &sfx) {
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    tailored->add(UnicodeString(unreversedPrefix).append(c).append(sfx));
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusTailoredSet::add(UChar32 c) {
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(unreversedPrefix.isEmpty() && suffix == NULL) {
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailored->add(c);
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UnicodeString s(unreversedPrefix);
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        s.append(c);
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(suffix != NULL) {
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s.append(*suffix);
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailored->add(s);
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::CESink::~CESink() {}
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_BEGIN
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic UBool U_CALLCONV
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusenumCnERange(const void *context, UChar32 start, UChar32 end, uint32_t ce32) {
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ContractionsAndExpansions *cne = (ContractionsAndExpansions *)context;
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(cne->checkTailored == 0) {
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // There is no tailoring.
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No need to collect nor check the tailored set.
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(cne->checkTailored < 0) {
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Collect the set of code points with mappings in the tailoring data.
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(ce32 == Collation::FALLBACK_CE32) {
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return TRUE;  // fallback to base, not tailored
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            cne->tailored.add(start, end);
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // checkTailored > 0: Exclude tailored ranges from the base data enumeration.
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(start == end) {
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(cne->tailored.contains(start)) {
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return TRUE;
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(cne->tailored.containsSome(start, end)) {
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        cne->ranges.set(start, end).removeAll(cne->tailored);
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t count = cne->ranges.getRangeCount();
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < count; ++i) {
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            cne->handleCE32(cne->ranges.getRangeStart(i), cne->ranges.getRangeEnd(i), ce32);
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return U_SUCCESS(cne->errorCode);
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    cne->handleCE32(start, end, ce32);
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return U_SUCCESS(cne->errorCode);
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_CDECL_END
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::forData(const CollationData *d, UErrorCode &ec) {
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(ec)) { return; }
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode = ec;  // Preserve info & warning codes.
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Add all from the data, can be tailoring or base.
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(d->base != NULL) {
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        checkTailored = -1;
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    data = d;
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utrie2_enum(data->trie, NULL, enumCnERange, this);
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(d->base == NULL || U_FAILURE(errorCode)) {
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ec = errorCode;
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Add all from the base data but only for un-tailored code points.
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    tailored.freeze();
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    checkTailored = 1;
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    data = d->base;
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    utrie2_enum(data->trie, NULL, enumCnERange, this);
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ec = errorCode;
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::forCodePoint(const CollationData *d, UChar32 c, UErrorCode &ec) {
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(ec)) { return; }
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    errorCode = ec;  // Preserve info & warning codes.
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t ce32 = d->getCE32(c);
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(ce32 == Collation::FALLBACK_CE32) {
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        d = d->base;
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ce32 = d->getCE32(c);
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    data = d;
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    handleCE32(c, c, ce32);
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ec = errorCode;
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handleCE32(UChar32 start, UChar32 end, uint32_t ce32) {
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((ce32 & 0xff) < Collation::SPECIAL_CE32_LOW_BYTE) {
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // !isSpecialCE32()
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleCE(Collation::ceFromSimpleCE32(ce32));
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        switch(Collation::tagFromCE32(ce32)) {
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::FALLBACK_TAG:
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::RESERVED_TAG_3:
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::BUILDER_DATA_TAG:
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::LEAD_SURROGATE_TAG:
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(U_SUCCESS(errorCode)) { errorCode = U_INTERNAL_PROGRAM_ERROR; }
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::LONG_PRIMARY_TAG:
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleCE(Collation::ceFromLongPrimaryCE32(ce32));
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::LONG_SECONDARY_TAG:
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleCE(Collation::ceFromLongSecondaryCE32(ce32));
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::LATIN_EXPANSION_TAG:
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ces[0] = Collation::latinCE0FromCE32(ce32);
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ces[1] = Collation::latinCE1FromCE32(ce32);
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleExpansion(ces, 2);
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Optimization: If we have a prefix,
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // then the relevant strings have been added already.
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(unreversedPrefix.isEmpty()) {
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                addExpansions(start, end);
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::EXPANSION32_TAG:
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                const uint32_t *ce32s = data->ce32s + Collation::indexFromCE32(ce32);
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                int32_t length = Collation::lengthFromCE32(ce32);
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                for(int32_t i = 0; i < length; ++i) {
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ces[i] = Collation::ceFromCE32(*ce32s++);
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleExpansion(ces, length);
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Optimization: If we have a prefix,
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // then the relevant strings have been added already.
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(unreversedPrefix.isEmpty()) {
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                addExpansions(start, end);
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::EXPANSION_TAG:
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                int32_t length = Collation::lengthFromCE32(ce32);
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink->handleExpansion(data->ces + Collation::indexFromCE32(ce32), length);
486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Optimization: If we have a prefix,
488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // then the relevant strings have been added already.
489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(unreversedPrefix.isEmpty()) {
490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                addExpansions(start, end);
491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::PREFIX_TAG:
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            handlePrefixes(start, end, ce32);
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::CONTRACTION_TAG:
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            handleContractions(start, end, ce32);
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::DIGIT_TAG:
500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Fetch the non-numeric-collation CE32 and continue.
501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 = data->ce32s[Collation::indexFromCE32(ce32)];
502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::U0000_TAG:
504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            U_ASSERT(start == 0 && end == 0);
505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Fetch the normal ce32 for U+0000 and continue.
506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ce32 = data->ce32s[0];
507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            break;
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::HANGUL_TAG:
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(sink != NULL) {
510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // TODO: This should be optimized,
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // especially if [start..end] is the complete Hangul range. (assert that)
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UTF16CollationIterator iter(data, FALSE, NULL, NULL, NULL);
513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                UChar hangul[1] = { 0 };
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                for(UChar32 c = start; c <= end; ++c) {
515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    hangul[0] = (UChar)c;
516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    iter.setText(hangul, hangul + 1);
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    int32_t length = iter.fetchCEs(errorCode);
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(U_FAILURE(errorCode)) { return; }
519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Ignore the terminating non-CE.
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    U_ASSERT(length >= 2 && iter.getCE(length - 1) == Collation::NO_CE);
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    sink->handleExpansion(iter.getCEs(), length - 1);
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Optimization: If we have a prefix,
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // then the relevant strings have been added already.
526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(unreversedPrefix.isEmpty()) {
527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                addExpansions(start, end);
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::OFFSET_TAG:
531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Currently no need to send offset CEs to the sink.
532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        case Collation::IMPLICIT_TAG:
534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Currently no need to send implicit CEs to the sink.
535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handlePrefixes(
542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 start, UChar32 end, uint32_t ce32) {
543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ce32 = CollationData::readCE32(p);  // Default if no prefix match.
545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    handleCE32(start, end, ce32);
546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!addPrefixes) { return; }
547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator prefixes(p + 2, 0, errorCode);
548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(prefixes.next(errorCode)) {
549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        setPrefix(prefixes.getString());
550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Prefix/pre-context mappings are special kinds of contractions
551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // that always yield expansions.
552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addStrings(start, end, contractions);
553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addStrings(start, end, expansions);
554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        handleCE32(start, end, (uint32_t)prefixes.getValue());
555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    resetPrefix();
557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::handleContractions(
561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 start, UChar32 end, uint32_t ce32) {
562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const UChar *p = data->contexts + Collation::indexFromCE32(ce32);
563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((ce32 & Collation::CONTRACT_SINGLE_CP_NO_MATCH) != 0) {
564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No match on the single code point.
565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // We are underneath a prefix, and the default mapping is just
566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // a fallback to the mappings for a shorter prefix.
567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(!unreversedPrefix.isEmpty());
568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ce32 = CollationData::readCE32(p);  // Default if no suffix match.
570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(!Collation::isContractionCE32(ce32));
571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        handleCE32(start, end, ce32);
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UCharsTrie::Iterator suffixes(p + 2, 0, errorCode);
574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    while(suffixes.next(errorCode)) {
575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        suffix = &suffixes.getString();
576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addStrings(start, end, contractions);
577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!unreversedPrefix.isEmpty()) {
578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            addStrings(start, end, expansions);
579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        handleCE32(start, end, (uint32_t)suffixes.getValue());
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    suffix = NULL;
583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::addExpansions(UChar32 start, UChar32 end) {
587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(unreversedPrefix.isEmpty() && suffix == NULL) {
588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(expansions != NULL) {
589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            expansions->add(start, end);
590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        addStrings(start, end, expansions);
593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusContractionsAndExpansions::addStrings(UChar32 start, UChar32 end, UnicodeSet *set) {
598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(set == NULL) { return; }
599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeString s(unreversedPrefix);
600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    do {
601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        s.append(start);
602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(suffix != NULL) {
603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            s.append(*suffix);
604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        set->add(s);
606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        s.truncate(unreversedPrefix.length());
607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } while(++start <= end);
608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
613