1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5* Copyright (C) 2013-2015, International Business Machines
6* Corporation and others.  All Rights Reserved.
7*******************************************************************************
8* collationsettings.cpp
9*
10* created on: 2013feb07
11* created by: Markus W. Scherer
12*/
13
14#include "unicode/utypes.h"
15
16#if !UCONFIG_NO_COLLATION
17
18#include "unicode/ucol.h"
19#include "cmemory.h"
20#include "collation.h"
21#include "collationdata.h"
22#include "collationsettings.h"
23#include "sharedobject.h"
24#include "uassert.h"
25#include "umutex.h"
26#include "uvectr32.h"
27
28U_NAMESPACE_BEGIN
29
30CollationSettings::CollationSettings(const CollationSettings &other)
31        : SharedObject(other),
32          options(other.options), variableTop(other.variableTop),
33          reorderTable(NULL),
34          minHighNoReorder(other.minHighNoReorder),
35          reorderRanges(NULL), reorderRangesLength(0),
36          reorderCodes(NULL), reorderCodesLength(0), reorderCodesCapacity(0),
37          fastLatinOptions(other.fastLatinOptions) {
38    UErrorCode errorCode = U_ZERO_ERROR;
39    copyReorderingFrom(other, errorCode);
40    if(fastLatinOptions >= 0) {
41        uprv_memcpy(fastLatinPrimaries, other.fastLatinPrimaries, sizeof(fastLatinPrimaries));
42    }
43}
44
45CollationSettings::~CollationSettings() {
46    if(reorderCodesCapacity != 0) {
47        uprv_free(const_cast<int32_t *>(reorderCodes));
48    }
49}
50
51UBool
52CollationSettings::operator==(const CollationSettings &other) const {
53    if(options != other.options) { return FALSE; }
54    if((options & ALTERNATE_MASK) != 0 && variableTop != other.variableTop) { return FALSE; }
55    if(reorderCodesLength != other.reorderCodesLength) { return FALSE; }
56    for(int32_t i = 0; i < reorderCodesLength; ++i) {
57        if(reorderCodes[i] != other.reorderCodes[i]) { return FALSE; }
58    }
59    return TRUE;
60}
61
62int32_t
63CollationSettings::hashCode() const {
64    int32_t h = options << 8;
65    if((options & ALTERNATE_MASK) != 0) { h ^= variableTop; }
66    h ^= reorderCodesLength;
67    for(int32_t i = 0; i < reorderCodesLength; ++i) {
68        h ^= (reorderCodes[i] << i);
69    }
70    return h;
71}
72
73void
74CollationSettings::resetReordering() {
75    // When we turn off reordering, we want to set a NULL permutation
76    // rather than a no-op permutation.
77    // Keep the memory via reorderCodes and its capacity.
78    reorderTable = NULL;
79    minHighNoReorder = 0;
80    reorderRangesLength = 0;
81    reorderCodesLength = 0;
82}
83
84void
85CollationSettings::aliasReordering(const CollationData &data, const int32_t *codes, int32_t length,
86                                   const uint32_t *ranges, int32_t rangesLength,
87                                   const uint8_t *table, UErrorCode &errorCode) {
88    if(U_FAILURE(errorCode)) { return; }
89    if(table != NULL &&
90            (rangesLength == 0 ?
91                    !reorderTableHasSplitBytes(table) :
92                    rangesLength >= 2 &&
93                    // The first offset must be 0. The last offset must not be 0.
94                    (ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0)) {
95        // We need to release the memory before setting the alias pointer.
96        if(reorderCodesCapacity != 0) {
97            uprv_free(const_cast<int32_t *>(reorderCodes));
98            reorderCodesCapacity = 0;
99        }
100        reorderTable = table;
101        reorderCodes = codes;
102        reorderCodesLength = length;
103        // Drop ranges before the first split byte. They are reordered by the table.
104        // This then speeds up reordering of the remaining ranges.
105        int32_t firstSplitByteRangeIndex = 0;
106        while(firstSplitByteRangeIndex < rangesLength &&
107                (ranges[firstSplitByteRangeIndex] & 0xff0000) == 0) {
108            // The second byte of the primary limit is 0.
109            ++firstSplitByteRangeIndex;
110        }
111        if(firstSplitByteRangeIndex == rangesLength) {
112            U_ASSERT(!reorderTableHasSplitBytes(table));
113            minHighNoReorder = 0;
114            reorderRanges = NULL;
115            reorderRangesLength = 0;
116        } else {
117            U_ASSERT(table[ranges[firstSplitByteRangeIndex] >> 24] == 0);
118            minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
119            reorderRanges = ranges + firstSplitByteRangeIndex;
120            reorderRangesLength = rangesLength - firstSplitByteRangeIndex;
121        }
122        return;
123    }
124    // Regenerate missing data.
125    setReordering(data, codes, length, errorCode);
126}
127
128void
129CollationSettings::setReordering(const CollationData &data,
130                                 const int32_t *codes, int32_t codesLength,
131                                 UErrorCode &errorCode) {
132    if(U_FAILURE(errorCode)) { return; }
133    if(codesLength == 0 || (codesLength == 1 && codes[0] == UCOL_REORDER_CODE_NONE)) {
134        resetReordering();
135        return;
136    }
137    UVector32 rangesList(errorCode);
138    data.makeReorderRanges(codes, codesLength, rangesList, errorCode);
139    if(U_FAILURE(errorCode)) { return; }
140    int32_t rangesLength = rangesList.size();
141    if(rangesLength == 0) {
142        resetReordering();
143        return;
144    }
145    const uint32_t *ranges = reinterpret_cast<uint32_t *>(rangesList.getBuffer());
146    // ranges[] contains at least two (limit, offset) pairs.
147    // The first offset must be 0. The last offset must not be 0.
148    // Separators (at the low end) and trailing weights (at the high end)
149    // are never reordered.
150    U_ASSERT(rangesLength >= 2);
151    U_ASSERT((ranges[0] & 0xffff) == 0 && (ranges[rangesLength - 1] & 0xffff) != 0);
152    minHighNoReorder = ranges[rangesLength - 1] & 0xffff0000;
153
154    // Write the lead byte permutation table.
155    // Set a 0 for each lead byte that has a range boundary in the middle.
156    uint8_t table[256];
157    int32_t b = 0;
158    int32_t firstSplitByteRangeIndex = -1;
159    for(int32_t i = 0; i < rangesLength; ++i) {
160        uint32_t pair = ranges[i];
161        int32_t limit1 = (int32_t)(pair >> 24);
162        while(b < limit1) {
163            table[b] = (uint8_t)(b + pair);
164            ++b;
165        }
166        // Check the second byte of the limit.
167        if((pair & 0xff0000) != 0) {
168            table[limit1] = 0;
169            b = limit1 + 1;
170            if(firstSplitByteRangeIndex < 0) {
171                firstSplitByteRangeIndex = i;
172            }
173        }
174    }
175    while(b <= 0xff) {
176        table[b] = (uint8_t)b;
177        ++b;
178    }
179    if(firstSplitByteRangeIndex < 0) {
180        // The lead byte permutation table alone suffices for reordering.
181        rangesLength = 0;
182    } else {
183        // Remove the ranges below the first split byte.
184        ranges += firstSplitByteRangeIndex;
185        rangesLength -= firstSplitByteRangeIndex;
186    }
187    setReorderArrays(codes, codesLength, ranges, rangesLength, table, errorCode);
188}
189
190void
191CollationSettings::setReorderArrays(const int32_t *codes, int32_t codesLength,
192                                    const uint32_t *ranges, int32_t rangesLength,
193                                    const uint8_t *table, UErrorCode &errorCode) {
194    if(U_FAILURE(errorCode)) { return; }
195    int32_t *ownedCodes;
196    int32_t totalLength = codesLength + rangesLength;
197    U_ASSERT(totalLength > 0);
198    if(totalLength <= reorderCodesCapacity) {
199        ownedCodes = const_cast<int32_t *>(reorderCodes);
200    } else {
201        // Allocate one memory block for the codes, the ranges, and the 16-aligned table.
202        int32_t capacity = (totalLength + 3) & ~3;  // round up to a multiple of 4 ints
203        ownedCodes = (int32_t *)uprv_malloc(capacity * 4 + 256);
204        if(ownedCodes == NULL) {
205            resetReordering();
206            errorCode = U_MEMORY_ALLOCATION_ERROR;
207            return;
208        }
209        if(reorderCodesCapacity != 0) {
210            uprv_free(const_cast<int32_t *>(reorderCodes));
211        }
212        reorderCodes = ownedCodes;
213        reorderCodesCapacity = capacity;
214    }
215    uprv_memcpy(ownedCodes + reorderCodesCapacity, table, 256);
216    uprv_memcpy(ownedCodes, codes, codesLength * 4);
217    uprv_memcpy(ownedCodes + codesLength, ranges, rangesLength * 4);
218    reorderTable = reinterpret_cast<const uint8_t *>(reorderCodes + reorderCodesCapacity);
219    reorderCodesLength = codesLength;
220    reorderRanges = reinterpret_cast<uint32_t *>(ownedCodes) + codesLength;
221    reorderRangesLength = rangesLength;
222}
223
224void
225CollationSettings::copyReorderingFrom(const CollationSettings &other, UErrorCode &errorCode) {
226    if(U_FAILURE(errorCode)) { return; }
227    if(!other.hasReordering()) {
228        resetReordering();
229        return;
230    }
231    minHighNoReorder = other.minHighNoReorder;
232    if(other.reorderCodesCapacity == 0) {
233        // The reorder arrays are aliased to memory-mapped data.
234        reorderTable = other.reorderTable;
235        reorderRanges = other.reorderRanges;
236        reorderRangesLength = other.reorderRangesLength;
237        reorderCodes = other.reorderCodes;
238        reorderCodesLength = other.reorderCodesLength;
239    } else {
240        setReorderArrays(other.reorderCodes, other.reorderCodesLength,
241                         other.reorderRanges, other.reorderRangesLength,
242                         other.reorderTable, errorCode);
243    }
244}
245
246UBool
247CollationSettings::reorderTableHasSplitBytes(const uint8_t table[256]) {
248    U_ASSERT(table[0] == 0);
249    for(int32_t i = 1; i < 256; ++i) {
250        if(table[i] == 0) {
251            return TRUE;
252        }
253    }
254    return FALSE;
255}
256
257uint32_t
258CollationSettings::reorderEx(uint32_t p) const {
259    if(p >= minHighNoReorder) { return p; }
260    // Round up p so that its lower 16 bits are >= any offset bits.
261    // Then compare q directly with (limit, offset) pairs.
262    uint32_t q = p | 0xffff;
263    uint32_t r;
264    const uint32_t *ranges = reorderRanges;
265    while(q >= (r = *ranges)) { ++ranges; }
266    return p + (r << 24);
267}
268
269void
270CollationSettings::setStrength(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
271    if(U_FAILURE(errorCode)) { return; }
272    int32_t noStrength = options & ~STRENGTH_MASK;
273    switch(value) {
274    case UCOL_PRIMARY:
275    case UCOL_SECONDARY:
276    case UCOL_TERTIARY:
277    case UCOL_QUATERNARY:
278    case UCOL_IDENTICAL:
279        options = noStrength | (value << STRENGTH_SHIFT);
280        break;
281    case UCOL_DEFAULT:
282        options = noStrength | (defaultOptions & STRENGTH_MASK);
283        break;
284    default:
285        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
286        break;
287    }
288}
289
290void
291CollationSettings::setFlag(int32_t bit, UColAttributeValue value,
292                           int32_t defaultOptions, UErrorCode &errorCode) {
293    if(U_FAILURE(errorCode)) { return; }
294    switch(value) {
295    case UCOL_ON:
296        options |= bit;
297        break;
298    case UCOL_OFF:
299        options &= ~bit;
300        break;
301    case UCOL_DEFAULT:
302        options = (options & ~bit) | (defaultOptions & bit);
303        break;
304    default:
305        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
306        break;
307    }
308}
309
310void
311CollationSettings::setCaseFirst(UColAttributeValue value,
312                                int32_t defaultOptions, UErrorCode &errorCode) {
313    if(U_FAILURE(errorCode)) { return; }
314    int32_t noCaseFirst = options & ~CASE_FIRST_AND_UPPER_MASK;
315    switch(value) {
316    case UCOL_OFF:
317        options = noCaseFirst;
318        break;
319    case UCOL_LOWER_FIRST:
320        options = noCaseFirst | CASE_FIRST;
321        break;
322    case UCOL_UPPER_FIRST:
323        options = noCaseFirst | CASE_FIRST_AND_UPPER_MASK;
324        break;
325    case UCOL_DEFAULT:
326        options = noCaseFirst | (defaultOptions & CASE_FIRST_AND_UPPER_MASK);
327        break;
328    default:
329        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
330        break;
331    }
332}
333
334void
335CollationSettings::setAlternateHandling(UColAttributeValue value,
336                                        int32_t defaultOptions, UErrorCode &errorCode) {
337    if(U_FAILURE(errorCode)) { return; }
338    int32_t noAlternate = options & ~ALTERNATE_MASK;
339    switch(value) {
340    case UCOL_NON_IGNORABLE:
341        options = noAlternate;
342        break;
343    case UCOL_SHIFTED:
344        options = noAlternate | SHIFTED;
345        break;
346    case UCOL_DEFAULT:
347        options = noAlternate | (defaultOptions & ALTERNATE_MASK);
348        break;
349    default:
350        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
351        break;
352    }
353}
354
355void
356CollationSettings::setMaxVariable(int32_t value, int32_t defaultOptions, UErrorCode &errorCode) {
357    if(U_FAILURE(errorCode)) { return; }
358    int32_t noMax = options & ~MAX_VARIABLE_MASK;
359    switch(value) {
360    case MAX_VAR_SPACE:
361    case MAX_VAR_PUNCT:
362    case MAX_VAR_SYMBOL:
363    case MAX_VAR_CURRENCY:
364        options = noMax | (value << MAX_VARIABLE_SHIFT);
365        break;
366    case UCOL_DEFAULT:
367        options = noMax | (defaultOptions & MAX_VARIABLE_MASK);
368        break;
369    default:
370        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
371        break;
372    }
373}
374
375U_NAMESPACE_END
376
377#endif  // !UCONFIG_NO_COLLATION
378