10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
51b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatareader.cpp
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013feb07
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/udata.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uscript.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatareader.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfastlatin.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationkeys.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationrootelements.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h"
30c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#include "collunsafe.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucmndata.h"
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h"
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t getIndex(const int32_t *indexes, int32_t length, int32_t i) {
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return (i < length) ? indexes[i] : -1;
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes, int32_t inLength,
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          CollationTailoring &tailoring, UErrorCode &errorCode) {
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(base != NULL) {
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(inBytes == NULL || (0 <= inLength && inLength < 24)) {
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const DataHeader *header = reinterpret_cast<const DataHeader *>(inBytes);
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!(header->dataHeader.magic1 == 0xda && header->dataHeader.magic2 == 0x27 &&
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                isAcceptable(tailoring.version, NULL, NULL, &header->info))) {
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(base->getUCAVersion() != tailoring.getUCAVersion()) {
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_COLLATOR_VERSION_MISMATCH;
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t headerLength = header->dataHeader.headerSize;
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inBytes += headerLength;
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(inLength >= 0) {
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            inLength -= headerLength;
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(inBytes == NULL || (0 <= inLength && inLength < 8)) {
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const int32_t *inIndexes = reinterpret_cast<const int32_t *>(inBytes);
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t indexesLength = inIndexes[IX_INDEXES_LENGTH];
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(indexesLength < 2 || (0 <= inLength && inLength < indexesLength * 4)) {
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // Not enough indexes.
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Assume that the tailoring data is in initial state,
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // with NULL pointers and 0 lengths.
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Set pointers to non-empty data parts.
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Do this in order of their byte offsets. (Should help porting to Java.)
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t index;  // one of the indexes[] slots
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t offset;  // byte offset for the index part
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length;  // number of bytes in the index part
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(indexesLength > IX_TOTAL_SIZE) {
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = inIndexes[IX_TOTAL_SIZE];
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(indexesLength > IX_REORDER_CODES_OFFSET) {
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = inIndexes[indexesLength - 1];
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = 0;  // only indexes, and inLength was already checked for them
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(0 <= inLength && inLength < length) {
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *baseData = base == NULL ? NULL : base->data;
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const int32_t *reorderCodes = NULL;
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t reorderCodesLength = 0;
1081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    const uint32_t *reorderRanges = NULL;
1091b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t reorderRangesLength = 0;
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_REORDER_CODES_OFFSET;
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(baseData == NULL) {
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We assume for collation settings that
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // the base data does not have a reordering.
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodesLength = length / 4;
1221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
1231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // The reorderRanges (if any) are the trailing reorderCodes entries.
1241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Split the array at the boundary.
1251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Script or reorder codes do not exceed 16-bit values.
1261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Range limits are stored in the upper 16 bits, and are never 0.
1271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        while(reorderRangesLength < reorderCodesLength &&
1281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) {
1291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            ++reorderRangesLength;
1301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        U_ASSERT(reorderRangesLength < reorderCodesLength);
1321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(reorderRangesLength != 0) {
1331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            reorderCodesLength -= reorderRangesLength;
1341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + reorderCodesLength);
1351b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // There should be a reorder table only if there are reorder codes.
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // However, when there are reorder codes the reorder table may be omitted to reduce
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // the data size.
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *reorderTable = NULL;
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_REORDER_TABLE_OFFSET;
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 256) {
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(reorderCodesLength == 0) {
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Reordering table without reordering codes.
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderTable = inBytes + offset;
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // If we have reorder codes, then build the reorderTable at the end,
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // when the CollationData is otherwise complete.
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(baseData != NULL && baseData->numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000)) {
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationData *data = NULL;  // Remains NULL if there are no mappings.
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_TRIE_OFFSET;
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 8) {
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!tailoring.ensureOwnedData(errorCode)) { return; }
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data = tailoring.ownedData;
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->base = baseData;
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000;
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->trie = tailoring.trie = utrie2_openFromSerialized(
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UTRIE2_32_VALUE_BITS, inBytes + offset, length, NULL,
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            &errorCode);
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Use the base data. Only the settings are tailored.
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.data = baseData;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No mappings.
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CES_OFFSET;
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 8) {
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored ces without tailored trie.
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ces = reinterpret_cast<const int64_t *>(inBytes + offset);
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->cesLength = length / 8;
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CE32S_OFFSET;
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored ce32s without tailored trie.
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ce32s = reinterpret_cast<const uint32_t *>(inBytes + offset);
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ce32sLength = length / 4;
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t jamoCE32sStart = getIndex(inIndexes, indexesLength, IX_JAMO_CE32S_START);
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(jamoCE32sStart >= 0) {
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL || data->ce32s == NULL) {
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Index into non-existent ce32s[].
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->jamoCE32s = data->ce32s + jamoCE32sStart;
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->jamoCE32s = baseData->jamoCE32s;
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No Jamo CE32s for Hangul processing.
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_ROOT_ELEMENTS_OFFSET;
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length /= 4;
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL || length <= CollationRootElements::IX_SEC_TER_BOUNDARIES) {
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->rootElements = reinterpret_cast<const uint32_t *>(inBytes + offset);
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->rootElementsLength = length;
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t commonSecTer = data->rootElements[CollationRootElements::IX_COMMON_SEC_AND_TER_CE];
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(commonSecTer != Collation::COMMON_SEC_AND_TER_CE) {
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t secTerBoundaries = data->rootElements[CollationRootElements::IX_SEC_TER_BOUNDARIES];
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((secTerBoundaries >> 24) < CollationKeys::SEC_COMMON_HIGH) {
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // [fixed last secondary common byte] is too low,
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // and secondary weights would collide with compressed common secondaries.
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CONTEXTS_OFFSET;
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored contexts without tailored trie.
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->contexts = reinterpret_cast<const UChar *>(inBytes + offset);
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->contextsLength = length / 2;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_UNSAFE_BWD_OFFSET;
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(baseData == NULL) {
268c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#if defined(COLLUNSAFE_COLL_VERSION) && defined (COLLUNSAFE_SERIALIZE)
269c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          tailoring.unsafeBackwardSet = new UnicodeSet(unsafe_serializedData, unsafe_serializedCount, UnicodeSet::kSerialized, errorCode);
270c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          if(tailoring.unsafeBackwardSet == NULL) {
271c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            errorCode = U_MEMORY_ALLOCATION_ERROR;
272c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            return;
273c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          } else if (U_FAILURE(errorCode)) {
274c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert            return;
275c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert          }
276c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#else
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Create the unsafe-backward set for the root collator.
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Include all non-zero combining marks and trail surrogates.
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We do this at load time, rather than at build time,
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // to simplify Unicode version bootstrapping:
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The root data builder only needs the new FractionalUCA.txt data,
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // but it need not be built with a version of ICU already updated to
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // the corresponding new Unicode Character Database.
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            //
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The following is an optimized version of
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]").
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // It is faster and requires fewer code dependencies.
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff);  // trail surrogates
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(tailoring.unsafeBackwardSet == NULL) {
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);
294c14898b482f76ecab9026615e2e4c6fe78358bdcFredrik Roubert#endif // !COLLUNSAFE_SERIALIZE || !COLLUNSAFE_COLL_VERSION
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Clone the root collator's set contents.
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                baseData->unsafeBackwardSet->cloneAsThawed());
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(tailoring.unsafeBackwardSet == NULL) {
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Add the ranges from the data file to the unsafe-backward set.
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USerializedSet sset;
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint16_t *unsafeData = reinterpret_cast<const uint16_t *>(inBytes + offset);
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!uset_getSerializedSet(&sset, unsafeData, length / 2)) {
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t count = uset_getSerializedRangeCount(&sset);
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < count; ++i) {
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar32 start, end;
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uset_getSerializedRange(&sset, i, &start, &end);
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet->add(start, end);
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Mark each lead surrogate as "unsafe"
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // if any of its 1024 associated supplementary code points is "unsafe".
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = 0x10000;
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(UChar lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) {
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!tailoring.unsafeBackwardSet->containsNone(c, c + 0x3ff)) {
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tailoring.unsafeBackwardSet->add(lead);
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.unsafeBackwardSet->freeze();
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->unsafeBackwardSet = tailoring.unsafeBackwardSet;
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No tailoring-specific data: Alias the root collator's set.
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->unsafeBackwardSet = baseData->unsafeBackwardSet;
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No unsafeBackwardSet.
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // If the fast Latin format version is different,
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // or the version is set to 0 for "no fast Latin table",
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // then just always use the normal string comparison path.
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(data != NULL) {
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->fastLatinTable = NULL;
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->fastLatinTableLength = 0;
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin::VERSION) {
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            index = IX_FAST_LATIN_TABLE_OFFSET;
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offset = getIndex(inIndexes, indexesLength, index);
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = getIndex(inIndexes, indexesLength, index + 1) - offset;
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(length >= 2) {
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTable = reinterpret_cast<const uint16_t *>(inBytes + offset);
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTableLength = length / 2;
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if((*data->fastLatinTable >> 8) != CollationFastLatin::VERSION) {
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errorCode = U_INVALID_FORMAT_ERROR;  // header vs. table version mismatch
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return;
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(baseData != NULL) {
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTable = baseData->fastLatinTable;
356fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTableLength = baseData->fastLatinTableLength;
357fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
358fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
359fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
360fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
361fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_SCRIPTS_OFFSET;
362fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
363fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
364fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
365fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
366fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
367fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
368fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
3691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        const uint16_t *scripts = reinterpret_cast<const uint16_t *>(inBytes + offset);
3701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t scriptsLength = length / 2;
3711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->numScripts = scripts[0];
3721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // There must be enough entries for both arrays, including more than two range starts.
3731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStartsLength = scriptsLength - (1 + data->numScripts + 16);
3741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(data->scriptStartsLength <= 2 ||
3751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                CollationData::MAX_NUM_SCRIPT_RANGES < data->scriptStartsLength) {
3761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            errorCode = U_INVALID_FORMAT_ERROR;
3771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return;
3781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
3791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptsIndex = scripts + 1;
3801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStarts = scripts + 1 + data->numScripts + 16;
3811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(!(data->scriptStarts[0] == 0 &&
3821b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                data->scriptStarts[1] == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8) &&
3831b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                data->scriptStarts[data->scriptStartsLength - 1] ==
3841b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        (Collation::TRAIL_WEIGHT_BYTE << 8))) {
3851b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            errorCode = U_INVALID_FORMAT_ERROR;
3861b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return;
3871b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
3911b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->numScripts = baseData->numScripts;
3921b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptsIndex = baseData->scriptsIndex;
3931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStarts = baseData->scriptStarts;
3941b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStartsLength = baseData->scriptStartsLength;
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_COMPRESSIBLE_BYTES_OFFSET;
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 256) {
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->compressibleBytes = reinterpret_cast<const UBool *>(inBytes + offset);
406fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->compressibleBytes = baseData->compressibleBytes;
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No compressibleBytes[].
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationSettings &ts = *tailoring.settings;
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t options = inIndexes[IX_OPTIONS] & 0xffff;
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint16_t fastLatinPrimaries[CollationFastLatin::LATIN_LIMIT];
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fastLatinOptions = CollationFastLatin::getOptions(
419f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            tailoring.data, ts, fastLatinPrimaries, UPRV_LENGTHOF(fastLatinPrimaries));
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(options == ts.options && ts.variableTop != 0 &&
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            reorderCodesLength == ts.reorderCodesLength &&
422ffdc27edd5503111189fc11165c5a11289a71f79Fredrik Roubert            (reorderCodesLength == 0 ||
423ffdc27edd5503111189fc11165c5a11289a71f79Fredrik Roubert                uprv_memcmp(reorderCodes, ts.reorderCodes, reorderCodesLength * 4) == 0) &&
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fastLatinOptions == ts.fastLatinOptions &&
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (fastLatinOptions < 0 ||
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uprv_memcmp(fastLatinPrimaries, ts.fastLatinPrimaries,
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            sizeof(fastLatinPrimaries)) == 0)) {
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
431fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationSettings *settings = SharedObject::copyOnWrite(tailoring.settings);
432fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(settings == NULL) {
433fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
434fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->options = options;
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Set variableTop from options and scripts data.
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->variableTop = tailoring.data->getLastPrimaryForGroup(
439fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(settings->variableTop == 0) {
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4451b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if(reorderCodesLength != 0) {
4461b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        settings->aliasReordering(*baseData, reorderCodes, reorderCodesLength,
4471b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                  reorderRanges, reorderRangesLength,
4481b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                  reorderTable, errorCode);
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->fastLatinOptions = CollationFastLatin::getOptions(
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.data, *settings,
453f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        settings->fastLatinPrimaries, UPRV_LENGTHOF(settings->fastLatinPrimaries));
454fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool U_CALLCONV
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataReader::isAcceptable(void *context,
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  const char * /* type */, const char * /*name*/,
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  const UDataInfo *pInfo) {
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->size >= 20 &&
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->charsetFamily == U_CHARSET_FAMILY &&
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[0] == 0x55 &&  // dataFormat="UCol"
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[1] == 0x43 &&
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[2] == 0x6f &&
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[3] == 0x6c &&
4681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        pInfo->formatVersion[0] == 5
469fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ) {
470fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UVersionInfo *version = static_cast<UVersionInfo *>(context);
471fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(version != NULL) {
472fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcpy(version, pInfo->dataVersion, 4);
473fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
474fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return TRUE;
475fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
476fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
477fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
478fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
479fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
480fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
481fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
482fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
483