1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Copyright (C) 2012-2014, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationkeys.cpp
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2012sep02
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/bytestream.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationiterator.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationkeys.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyByteSink::~SortKeyByteSink() {}
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyByteSink::Append(const char *bytes, int32_t n) {
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (n <= 0 || bytes == NULL) {
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (ignore_ > 0) {
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t ignoreRest = ignore_ - n;
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if (ignoreRest >= 0) {
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ignore_ = ignoreRest;
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            bytes += ignore_;
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            n = -ignoreRest;
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            ignore_ = 0;
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length = appended_;
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    appended_ += n;
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if ((buffer_ + length) == bytes) {
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;  // the caller used GetAppendBuffer() and wrote the bytes already
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t available = capacity_ - length;
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (n <= available) {
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uprv_memcpy(buffer_ + length, bytes, n);
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        AppendBeyondCapacity(bytes, n, length);
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuschar *
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyByteSink::GetAppendBuffer(int32_t min_capacity,
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                 int32_t desired_capacity_hint,
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                 char *scratch,
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                 int32_t scratch_capacity,
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                 int32_t *result_capacity) {
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (min_capacity < 1 || scratch_capacity < min_capacity) {
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *result_capacity = 0;
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (ignore_ > 0) {
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Do not write ignored bytes right at the end of the buffer.
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *result_capacity = scratch_capacity;
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return scratch;
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t available = capacity_ - appended_;
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (available >= min_capacity) {
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *result_capacity = available;
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return buffer_ + appended_;
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if (Resize(desired_capacity_hint, appended_)) {
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *result_capacity = capacity_ - appended_;
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return buffer_ + appended_;
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        *result_capacity = scratch_capacity;
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return scratch;
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * uint8_t byte buffer, similar to CharString but simpler.
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusclass SortKeyLevel : public UMemory {
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliuspublic:
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel() : len(0), ok(TRUE) {}
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ~SortKeyLevel() {}
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** @return FALSE if memory allocation failed */
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool isOk() const { return ok; }
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool isEmpty() const { return len == 0; }
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length() const { return len; }
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *data() const { return buffer.getAlias(); }
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t operator[](int32_t index) const { return buffer[index]; }
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t *data() { return buffer.getAlias(); }
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendByte(uint32_t b);
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendWeight16(uint32_t w);
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendWeight32(uint32_t w);
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendReverseWeight16(uint32_t w);
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    /** Appends all but the last byte to the sink. The last byte should be the 01 terminator. */
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    void appendTo(ByteSink &sink) const {
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(len > 0 && buffer[len - 1] == 1);
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(reinterpret_cast<const char *>(buffer.getAlias()), len - 1);
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusprivate:
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    MaybeStackArray<uint8_t, 40> buffer;
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t len;
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool ok;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool ensureCapacity(int32_t appendCapacity);
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel(const SortKeyLevel &other); // forbid copying of this class
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel &operator=(const SortKeyLevel &other); // forbid copying of this class
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid SortKeyLevel::appendByte(uint32_t b) {
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(len < buffer.getCapacity() || ensureCapacity(1)) {
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        buffer[len++] = (uint8_t)b;
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyLevel::appendWeight16(uint32_t w) {
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT((w & 0xffff) != 0);
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t b0 = (uint8_t)(w >> 8);
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t b1 = (uint8_t)w;
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t appendLength = (b1 == 0) ? 1 : 2;
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        buffer[len++] = b0;
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b1 != 0) {
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[len++] = b1;
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyLevel::appendWeight32(uint32_t w) {
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT(w != 0);
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t bytes[4] = { (uint8_t)(w >> 24), (uint8_t)(w >> 16), (uint8_t)(w >> 8), (uint8_t)w };
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t appendLength = (bytes[1] == 0) ? 1 : (bytes[2] == 0) ? 2 : (bytes[3] == 0) ? 3 : 4;
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        buffer[len++] = bytes[0];
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(bytes[1] != 0) {
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[len++] = bytes[1];
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(bytes[2] != 0) {
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                buffer[len++] = bytes[2];
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(bytes[3] != 0) {
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    buffer[len++] = bytes[3];
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusSortKeyLevel::appendReverseWeight16(uint32_t w) {
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT((w & 0xffff) != 0);
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t b0 = (uint8_t)(w >> 8);
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint8_t b1 = (uint8_t)w;
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t appendLength = (b1 == 0) ? 1 : 2;
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((len + appendLength) <= buffer.getCapacity() || ensureCapacity(appendLength)) {
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b1 == 0) {
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[len++] = b0;
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[len] = b1;
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            buffer[len + 1] = b0;
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            len += 2;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool SortKeyLevel::ensureCapacity(int32_t appendCapacity) {
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!ok) {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t newCapacity = 2 * buffer.getCapacity();
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t altCapacity = len + 2 * appendCapacity;
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (newCapacity < altCapacity) {
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        newCapacity = altCapacity;
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if (newCapacity < 200) {
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        newCapacity = 200;
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(buffer.resize(newCapacity, len)==NULL) {
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return ok = FALSE;
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return TRUE;
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationKeys::LevelCallback::~LevelCallback() {}
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationKeys::LevelCallback::needToWrite(Collation::Level /*level*/) { return TRUE; }
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/**
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * Map from collation strength (UColAttributeValue)
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * to a mask of Collation::Level bits up to that strength,
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * excluding the CASE_LEVEL which is independent of the strength,
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius * and excluding IDENTICAL_LEVEL which this function does not write.
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius */
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const uint32_t levelMasks[UCOL_STRENGTH_LIMIT] = {
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    2,          // UCOL_PRIMARY -> PRIMARY_LEVEL
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    6,          // UCOL_SECONDARY -> up to SECONDARY_LEVEL
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0x16,       // UCOL_TERTIARY -> up to TERTIARY_LEVEL
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0x36,       // UCOL_QUATERNARY -> up to QUATERNARY_LEVEL
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0, 0, 0, 0,
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0, 0, 0, 0,
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0, 0, 0,
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0x36        // UCOL_IDENTICAL -> up to QUATERNARY_LEVEL
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationKeys::writeSortKeyUpToQuaternary(CollationIterator &iter,
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          const UBool *compressibleBytes,
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          const CollationSettings &settings,
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          SortKeyByteSink &sink,
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          Collation::Level minLevel, LevelCallback &callback,
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                          UBool preflight, UErrorCode &errorCode) {
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t options = settings.options;
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Set of levels to process and write.
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t levels = levelMasks[CollationSettings::getStrength(options)];
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((options & CollationSettings::CASE_LEVEL) != 0) {
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        levels |= Collation::CASE_LEVEL_FLAG;
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Minus the levels below minLevel.
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    levels &= ~(((uint32_t)1 << minLevel) - 1);
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(levels == 0) { return; }
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t variableTop;
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((options & CollationSettings::ALTERNATE_MASK) == 0) {
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        variableTop = 0;
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // +1 so that we can use "<" and primary ignorables test out early.
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        variableTop = settings.variableTop + 1;
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *reorderTable = settings.reorderTable;
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t tertiaryMask = CollationSettings::getTertiaryMask(options);
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel cases;
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel secondaries;
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel tertiaries;
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    SortKeyLevel quaternaries;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t compressedP1 = 0;  // 0==no compression; otherwise reordered compressible lead byte
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t commonCases = 0;
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t commonSecondaries = 0;
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t commonTertiaries = 0;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t commonQuaternaries = 0;
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint32_t prevSecondary = 0;
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool anyMergeSeparators = FALSE;
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    for(;;) {
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No need to keep all CEs in the buffer when we write a sort key.
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        iter.clearCEsIfNoneRemaining();
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int64_t ce = iter.nextCE(errorCode);
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t p = (uint32_t)(ce >> 32);
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY) {
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Variable CE, shift it to quaternary level.
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Ignore all following primary ignorables, and shift further variable CEs.
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(commonQuaternaries != 0) {
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                --commonQuaternaries;
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    quaternaries.appendByte(QUAT_COMMON_MIDDLE);
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Shifted primary weights are lower than the common weight.
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                quaternaries.appendByte(QUAT_COMMON_LOW + commonQuaternaries);
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                commonQuaternaries = 0;
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            do {
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t p1 = p >> 24;
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(reorderTable != NULL) { p1 = reorderTable[p1]; }
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(p1 >= QUAT_SHIFTED_LIMIT_BYTE) {
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // Prevent shifted primary lead bytes from
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // overlapping with the common compression range.
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        quaternaries.appendByte(QUAT_SHIFTED_LIMIT_BYTE);
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    quaternaries.appendWeight32((p1 << 24) | (p & 0xffffff));
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                do {
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ce = iter.nextCE(errorCode);
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    p = (uint32_t)(ce >> 32);
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } while(p == 0);
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } while(p < variableTop && p > Collation::MERGE_SEPARATOR_PRIMARY);
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // ce could be primary ignorable, or NO_CE, or the merge separator,
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // or a regular primary CE, but it is not variable.
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // If ce==NO_CE, then write nothing for the primary level but
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // terminate compression on all levels and then exit the loop.
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(p > Collation::NO_CE_PRIMARY && (levels & Collation::PRIMARY_LEVEL_FLAG) != 0) {
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t p1 = p >> 24;
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(reorderTable != NULL) { p1 = reorderTable[p1]; }
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(p1 != compressedP1) {
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(compressedP1 != 0) {
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(p1 < compressedP1) {
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // No primary compression terminator
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // at the end of the level or merged segment.
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(p1 > Collation::MERGE_SEPARATOR_BYTE) {
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            sink.Append(Collation::PRIMARY_COMPRESSION_LOW_BYTE);
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        sink.Append(Collation::PRIMARY_COMPRESSION_HIGH_BYTE);
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink.Append(p1);
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Test the un-reordered lead byte for compressibility but
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // remember the reordered lead byte.
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(compressibleBytes[p >> 24]) {
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    compressedP1 = p1;
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    compressedP1 = 0;
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            char p2 = (char)(p >> 16);
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(p2 != 0) {
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                char buffer[3] = { p2, (char)(p >> 8), (char)p };
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink.Append(buffer, (buffer[1] == 0) ? 1 : (buffer[2] == 0) ? 2 : 3);
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Optimization for internalNextSortKeyPart():
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // When the primary level overflows we can stop because we need not
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // calculate (preflight) the whole sort key length.
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!preflight && sink.Overflowed()) {
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(U_SUCCESS(errorCode) && !sink.IsOk()) {
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errorCode = U_MEMORY_ALLOCATION_ERROR;
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t lower32 = (uint32_t)ce;
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(lower32 == 0) { continue; }  // completely ignorable, no secondary/case/tertiary/quaternary
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t s = lower32 >> 16;
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(s == 0) {
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // secondary ignorable
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(s == Collation::COMMON_WEIGHT16) {
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++commonSecondaries;
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if((options & CollationSettings::BACKWARD_SECONDARY) == 0) {
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonSecondaries != 0) {
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonSecondaries;
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonSecondaries >= SEC_COMMON_MAX_COUNT) {
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        secondaries.appendByte(SEC_COMMON_MIDDLE);
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonSecondaries -= SEC_COMMON_MAX_COUNT;
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(s < Collation::COMMON_WEIGHT16) {
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = SEC_COMMON_LOW + commonSecondaries;
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = SEC_COMMON_HIGH - commonSecondaries;
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    secondaries.appendByte(b);
369fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonSecondaries = 0;
370fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
371fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                secondaries.appendWeight16(s);
372fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
373fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonSecondaries != 0) {
374fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonSecondaries;
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Append reverse weights. The level will be re-reversed later.
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    int32_t remainder = commonSecondaries % SEC_COMMON_MAX_COUNT;
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
378fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(prevSecondary < Collation::COMMON_WEIGHT16) {
379fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = SEC_COMMON_LOW + remainder;
380fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
381fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = SEC_COMMON_HIGH - remainder;
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    secondaries.appendByte(b);
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonSecondaries -= remainder;
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // commonSecondaries is now a multiple of SEC_COMMON_MAX_COUNT.
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonSecondaries > 0) {  // same as >= SEC_COMMON_MAX_COUNT
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        secondaries.appendByte(SEC_COMMON_MIDDLE);
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonSecondaries -= SEC_COMMON_MAX_COUNT;
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // commonSecondaries == 0
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Reduce separators so that we can look for byte<=1 later.
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(s <= Collation::MERGE_SEPARATOR_WEIGHT16) {
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(s == Collation::MERGE_SEPARATOR_WEIGHT16) {
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        anyMergeSeparators = TRUE;
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    secondaries.appendByte((s >> 8) - 1);
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    secondaries.appendReverseWeight16(s);
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                prevSecondary = s;
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((CollationSettings::getStrength(options) == UCOL_PRIMARY) ?
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    p == 0 : lower32 <= 0xffff) {
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Primary+caseLevel: Ignore case level weights of primary ignorables.
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Otherwise: Ignore case level weights of secondary ignorables.
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // For details see the comments in the CollationCompare class.
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uint32_t c = (lower32 >> 8) & 0xff;  // case bits & tertiary lead byte
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                U_ASSERT((c & 0xc0) != 0xc0);
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if((c & 0xc0) == 0 && c > Collation::MERGE_SEPARATOR_BYTE) {
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    ++commonCases;
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if((options & CollationSettings::UPPER_FIRST) == 0) {
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // lowerFirst: Compress common weights to nibbles 1..7..13, mixed=14, upper=15.
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(commonCases != 0) {
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            --commonCases;
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            while(commonCases >= CASE_LOWER_FIRST_COMMON_MAX_COUNT) {
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                cases.appendByte(CASE_LOWER_FIRST_COMMON_MIDDLE << 4);
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                commonCases -= CASE_LOWER_FIRST_COMMON_MAX_COUNT;
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            }
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            uint32_t b;
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            if(c <= Collation::MERGE_SEPARATOR_BYTE) {
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                b = CASE_LOWER_FIRST_COMMON_LOW + commonCases;
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            } else {
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                b = CASE_LOWER_FIRST_COMMON_HIGH - commonCases;
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            }
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            cases.appendByte(b << 4);
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            commonCases = 0;
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(c > Collation::MERGE_SEPARATOR_BYTE) {
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            c = (CASE_LOWER_FIRST_COMMON_HIGH + (c >> 6)) << 4;  // 14 or 15
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // upperFirst: Compress common weights to nibbles 3..15, mixed=2, upper=1.
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // The compressed common case weights only go up from the "low" value
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        // because with upperFirst the common weight is the highest one.
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(commonCases != 0) {
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            --commonCases;
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            while(commonCases >= CASE_UPPER_FIRST_COMMON_MAX_COUNT) {
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                cases.appendByte(CASE_UPPER_FIRST_COMMON_LOW << 4);
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                commonCases -= CASE_UPPER_FIRST_COMMON_MAX_COUNT;
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            }
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            cases.appendByte((CASE_UPPER_FIRST_COMMON_LOW + commonCases) << 4);
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            commonCases = 0;
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        if(c > Collation::MERGE_SEPARATOR_BYTE) {
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            c = (CASE_UPPER_FIRST_COMMON_LOW - (c >> 6)) << 4;  // 2 or 1
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        }
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // c is a separator byte 01 or 02,
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // or a left-shifted nibble 0x10, 0x20, ... 0xf0.
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    cases.appendByte(c);
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t t = lower32 & tertiaryMask;
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            U_ASSERT((lower32 & 0xc000) != 0xc000);
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(t == Collation::COMMON_WEIGHT16) {
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++commonTertiaries;
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if((tertiaryMask & 0x8000) == 0) {
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Tertiary weights without case bits.
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Move lead bytes 06..3F to C6..FF for a large common-weight range.
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonTertiaries != 0) {
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonTertiaries;
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonTertiaries >= TER_ONLY_COMMON_MAX_COUNT) {
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        tertiaries.appendByte(TER_ONLY_COMMON_MIDDLE);
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonTertiaries -= TER_ONLY_COMMON_MAX_COUNT;
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(t < Collation::COMMON_WEIGHT16) {
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_ONLY_COMMON_LOW + commonTertiaries;
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_ONLY_COMMON_HIGH - commonTertiaries;
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    tertiaries.appendByte(b);
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonTertiaries = 0;
483fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
484fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(t > Collation::COMMON_WEIGHT16) { t += 0xc000; }
485fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tertiaries.appendWeight16(t);
486fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if((options & CollationSettings::UPPER_FIRST) == 0) {
487fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Tertiary weights with caseFirst=lowerFirst.
488fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Move lead bytes 06..BF to 46..FF for the common-weight range.
489fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonTertiaries != 0) {
490fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonTertiaries;
491fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonTertiaries >= TER_LOWER_FIRST_COMMON_MAX_COUNT) {
492fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        tertiaries.appendByte(TER_LOWER_FIRST_COMMON_MIDDLE);
493fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonTertiaries -= TER_LOWER_FIRST_COMMON_MAX_COUNT;
494fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
495fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
496fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(t < Collation::COMMON_WEIGHT16) {
497fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_LOWER_FIRST_COMMON_LOW + commonTertiaries;
498fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
499fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_LOWER_FIRST_COMMON_HIGH - commonTertiaries;
500fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
501fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    tertiaries.appendByte(b);
502fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonTertiaries = 0;
503fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
504fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(t > Collation::COMMON_WEIGHT16) { t += 0x4000; }
505fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tertiaries.appendWeight16(t);
506fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
507fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Tertiary weights with caseFirst=upperFirst.
508fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Do not change the artificial uppercase weight of a tertiary CE (0.0.ut),
509fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // to keep tertiary CEs well-formed.
510fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Their case+tertiary weights must be greater than those of
511fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // primary and secondary CEs.
512fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                //
513fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Separators    01..02 -> 01..02  (unchanged)
514fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Lowercase     03..04 -> 83..84  (includes uncased)
515fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Common weight     05 -> 85..C5  (common-weight compression range)
516fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Lowercase     06..3F -> C6..FF
517fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Mixed case    43..7F -> 43..7F
518fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Uppercase     83..BF -> 03..3F
519fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Tertiary CE   86..BF -> C6..FF
520fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(t <= Collation::MERGE_SEPARATOR_WEIGHT16) {
521fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Keep separators unchanged.
522fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else if(lower32 > 0xffff) {
523fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Invert case bits of primary & secondary CEs.
524fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    t ^= 0xc000;
525fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(t < (TER_UPPER_FIRST_COMMON_HIGH << 8)) {
526fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        t -= 0x4000;
527fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
528fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
529fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    // Keep uppercase bits of tertiary CEs.
530fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    U_ASSERT(0x8600 <= t && t <= 0xbfff);
531fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    t += 0x4000;
532fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
533fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonTertiaries != 0) {
534fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonTertiaries;
535fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonTertiaries >= TER_UPPER_FIRST_COMMON_MAX_COUNT) {
536fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        tertiaries.appendByte(TER_UPPER_FIRST_COMMON_MIDDLE);
537fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonTertiaries -= TER_UPPER_FIRST_COMMON_MAX_COUNT;
538fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
539fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
540fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(t < (TER_UPPER_FIRST_COMMON_LOW << 8)) {
541fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_UPPER_FIRST_COMMON_LOW + commonTertiaries;
542fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
543fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = TER_UPPER_FIRST_COMMON_HIGH - commonTertiaries;
544fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
545fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    tertiaries.appendByte(b);
546fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonTertiaries = 0;
547fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
548fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tertiaries.appendWeight16(t);
549fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
550fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
551fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
552fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
553fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint32_t q = lower32 & 0xffff;
554fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((q & 0xc0) == 0 && q > Collation::MERGE_SEPARATOR_WEIGHT16) {
555fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                ++commonQuaternaries;
556fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(q <= Collation::MERGE_SEPARATOR_WEIGHT16 &&
557fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    (options & CollationSettings::ALTERNATE_MASK) == 0 &&
558fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    (quaternaries.isEmpty() ||
559fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        quaternaries[quaternaries.length() - 1] == Collation::MERGE_SEPARATOR_BYTE)) {
560fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // If alternate=non-ignorable and there are only
561fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // common quaternary weights between two separators,
562fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // then we need not write anything between these separators.
563fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // The only weights greater than the merge separator and less than the common weight
564fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // are shifted primary weights, which are not generated for alternate=non-ignorable.
565fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // There are also exactly as many quaternary weights as tertiary weights,
566fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // so level length differences are handled already on tertiary level.
567fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Any above-common quaternary weight will compare greater regardless.
568fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                quaternaries.appendByte(q >> 8);
569fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
570fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(q <= Collation::MERGE_SEPARATOR_WEIGHT16) {
571fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    q >>= 8;
572fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
573fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    q = 0xfc + ((q >> 6) & 3);
574fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
575fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(commonQuaternaries != 0) {
576fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    --commonQuaternaries;
577fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(commonQuaternaries >= QUAT_COMMON_MAX_COUNT) {
578fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        quaternaries.appendByte(QUAT_COMMON_MIDDLE);
579fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        commonQuaternaries -= QUAT_COMMON_MAX_COUNT;
580fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
581fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint32_t b;
582fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    if(q < QUAT_COMMON_LOW) {
583fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = QUAT_COMMON_LOW + commonQuaternaries;
584fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    } else {
585fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        b = QUAT_COMMON_HIGH - commonQuaternaries;
586fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
587fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    quaternaries.appendByte(b);
588fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    commonQuaternaries = 0;
589fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
590fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                quaternaries.appendByte(q);
591fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
592fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
593fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
594fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((lower32 >> 24) == Collation::LEVEL_SEPARATOR_BYTE) { break; }  // ce == NO_CE
595fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
596fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
597fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
598fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
599fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Append the beyond-primary levels.
600fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool ok = TRUE;
601fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((levels & Collation::SECONDARY_LEVEL_FLAG) != 0) {
602fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!callback.needToWrite(Collation::SECONDARY_LEVEL)) { return; }
603fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ok &= secondaries.isOk();
604fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
605fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint8_t *secs = secondaries.data();
606fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length = secondaries.length() - 1;  // Ignore the trailing NO_CE.
607fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((options & CollationSettings::BACKWARD_SECONDARY) != 0) {
608fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The backwards secondary level compares secondary weights backwards
609fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // within segments separated by the merge separator (U+FFFE, weight 02).
610fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The separator weights 01 & 02 were reduced to 00 & 01 so that
611fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // we do not accidentally separate at a _second_ weight byte of 02.
612fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            int32_t start = 0;
613fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            for(;;) {
614fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Find the merge separator or the NO_CE terminator.
615fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                int32_t limit;
616fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(anyMergeSeparators) {
617fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    limit = start;
618fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(secs[limit] > 1) { ++limit; }
619fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
620fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    limit = length;
621fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
622fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Reverse this segment.
623fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(start < limit) {
624fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint8_t *p = secs + start;
625fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    uint8_t *q = secs + limit - 1;
626fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    while(p < q) {
627fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        uint8_t s = *p;
628fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        *p++ = *q;
629fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                        *q-- = s;
630fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    }
631fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
632fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Did we reach the end of the string?
633fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(secs[limit] == 0) { break; }
634fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Restore the merge separator.
635fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                secs[limit] = 2;
636fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Skip the merge separator and continue.
637fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                start = limit + 1;
638fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
639fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
640fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(reinterpret_cast<char *>(secs), length);
641fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
642fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
643fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((levels & Collation::CASE_LEVEL_FLAG) != 0) {
644fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!callback.needToWrite(Collation::CASE_LEVEL)) { return; }
645fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ok &= cases.isOk();
646fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
647fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Write pairs of nibbles as bytes, except separator bytes as themselves.
648fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length = cases.length() - 1;  // Ignore the trailing NO_CE.
649fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint8_t b = 0;
650fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < length; ++i) {
651fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint8_t c = (uint8_t)cases[i];
652fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(c <= Collation::MERGE_SEPARATOR_BYTE) {
653fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                U_ASSERT(c != 0);
654fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(b != 0) {
655fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    sink.Append(b);
656fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    b = 0;
657fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
658fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                sink.Append(c);
659fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else {
660fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                U_ASSERT((c & 0xf) == 0);
661fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if(b == 0) {
662fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    b = c;
663fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                } else {
664fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    sink.Append(b | (c >> 4));
665fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    b = 0;
666fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
667fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
668fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
669fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(b != 0) {
670fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            sink.Append(b);
671fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
672fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
673fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
674fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((levels & Collation::TERTIARY_LEVEL_FLAG) != 0) {
675fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!callback.needToWrite(Collation::TERTIARY_LEVEL)) { return; }
676fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ok &= tertiaries.isOk();
677fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
678fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tertiaries.appendTo(sink);
679fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
680fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
681fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if((levels & Collation::QUATERNARY_LEVEL_FLAG) != 0) {
682fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!callback.needToWrite(Collation::QUATERNARY_LEVEL)) { return; }
683fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        ok &= quaternaries.isOk();
684fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        sink.Append(Collation::LEVEL_SEPARATOR_BYTE);
685fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        quaternaries.appendTo(sink);
686fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
687fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
688fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(!ok || !sink.IsOk()) {
689fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
690fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
691fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
692fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
693fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
694fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
695fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
696