1fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
2fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
31b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
5fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatareader.cpp
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013feb07
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/ucol.h"
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/udata.h"
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uscript.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collation.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatareader.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfastlatin.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationkeys.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationrootelements.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "normalizer2impl.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucmndata.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "utrie2.h"
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusnamespace {
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t getIndex(const int32_t *indexes, int32_t length, int32_t i) {
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return (i < length) ? indexes[i] : -1;
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}  // namespace
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataReader::read(const CollationTailoring *base, const uint8_t *inBytes, int32_t inLength,
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                          CollationTailoring &tailoring, UErrorCode &errorCode) {
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return; }
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(base != NULL) {
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(inBytes == NULL || (0 <= inLength && inLength < 24)) {
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_ILLEGAL_ARGUMENT_ERROR;
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const DataHeader *header = reinterpret_cast<const DataHeader *>(inBytes);
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!(header->dataHeader.magic1 == 0xda && header->dataHeader.magic2 == 0x27 &&
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                isAcceptable(tailoring.version, NULL, NULL, &header->info))) {
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(base->getUCAVersion() != tailoring.getUCAVersion()) {
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_COLLATOR_VERSION_MISMATCH;
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t headerLength = header->dataHeader.headerSize;
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        inBytes += headerLength;
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(inLength >= 0) {
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            inLength -= headerLength;
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(inBytes == NULL || (0 <= inLength && inLength < 8)) {
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
73fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const int32_t *inIndexes = reinterpret_cast<const int32_t *>(inBytes);
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t indexesLength = inIndexes[IX_INDEXES_LENGTH];
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(indexesLength < 2 || (0 <= inLength && inLength < indexesLength * 4)) {
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // Not enough indexes.
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Assume that the tailoring data is in initial state,
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // with NULL pointers and 0 lengths.
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Set pointers to non-empty data parts.
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Do this in order of their byte offsets. (Should help porting to Java.)
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t index;  // one of the indexes[] slots
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t offset;  // byte offset for the index part
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t length;  // number of bytes in the index part
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(indexesLength > IX_TOTAL_SIZE) {
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = inIndexes[IX_TOTAL_SIZE];
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(indexesLength > IX_REORDER_CODES_OFFSET) {
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = inIndexes[indexesLength - 1];
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = 0;  // only indexes, and inLength was already checked for them
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(0 <= inLength && inLength < length) {
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *baseData = base == NULL ? NULL : base->data;
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const int32_t *reorderCodes = NULL;
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t reorderCodesLength = 0;
1051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    const uint32_t *reorderRanges = NULL;
1061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t reorderRangesLength = 0;
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_REORDER_CODES_OFFSET;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(baseData == NULL) {
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We assume for collation settings that
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // the base data does not have a reordering.
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodes = reinterpret_cast<const int32_t *>(inBytes + offset);
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderCodesLength = length / 4;
1191b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
1201b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // The reorderRanges (if any) are the trailing reorderCodes entries.
1211b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Split the array at the boundary.
1221b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Script or reorder codes do not exceed 16-bit values.
1231b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Range limits are stored in the upper 16 bits, and are never 0.
1241b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        while(reorderRangesLength < reorderCodesLength &&
1251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                (reorderCodes[reorderCodesLength - reorderRangesLength - 1] & 0xffff0000) != 0) {
1261b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            ++reorderRangesLength;
1271b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1281b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        U_ASSERT(reorderRangesLength < reorderCodesLength);
1291b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(reorderRangesLength != 0) {
1301b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            reorderCodesLength -= reorderRangesLength;
1311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            reorderRanges = reinterpret_cast<const uint32_t *>(reorderCodes + reorderCodesLength);
1321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // There should be a reorder table only if there are reorder codes.
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // However, when there are reorder codes the reorder table may be omitted to reduce
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // the data size.
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const uint8_t *reorderTable = NULL;
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_REORDER_TABLE_OFFSET;
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 256) {
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(reorderCodesLength == 0) {
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Reordering table without reordering codes.
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        reorderTable = inBytes + offset;
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // If we have reorder codes, then build the reorderTable at the end,
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // when the CollationData is otherwise complete.
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(baseData != NULL && baseData->numericPrimary != (inIndexes[IX_OPTIONS] & 0xff000000)) {
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationData *data = NULL;  // Remains NULL if there are no mappings.
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_TRIE_OFFSET;
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
162fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 8) {
163fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!tailoring.ensureOwnedData(errorCode)) { return; }
164fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data = tailoring.ownedData;
165fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->base = baseData;
166fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->numericPrimary = inIndexes[IX_OPTIONS] & 0xff000000;
167fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->trie = tailoring.trie = utrie2_openFromSerialized(
168fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UTRIE2_32_VALUE_BITS, inBytes + offset, length, NULL,
169fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            &errorCode);
170fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode)) { return; }
171fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
172fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Use the base data. Only the settings are tailored.
173fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.data = baseData;
174fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
175fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No mappings.
176fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
177fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
178fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CES_OFFSET;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 8) {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored ces without tailored trie.
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ces = reinterpret_cast<const int64_t *>(inBytes + offset);
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->cesLength = length / 8;
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CE32S_OFFSET;
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
193fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored ce32s without tailored trie.
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ce32s = reinterpret_cast<const uint32_t *>(inBytes + offset);
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->ce32sLength = length / 4;
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t jamoCE32sStart = getIndex(inIndexes, indexesLength, IX_JAMO_CE32S_START);
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(jamoCE32sStart >= 0) {
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL || data->ce32s == NULL) {
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Index into non-existent ce32s[].
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->jamoCE32s = data->ce32s + jamoCE32sStart;
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->jamoCE32s = baseData->jamoCE32s;
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No Jamo CE32s for Hangul processing.
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_ROOT_ELEMENTS_OFFSET;
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 4) {
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length /= 4;
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL || length <= CollationRootElements::IX_SEC_TER_BOUNDARIES) {
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
227fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->rootElements = reinterpret_cast<const uint32_t *>(inBytes + offset);
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->rootElementsLength = length;
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t commonSecTer = data->rootElements[CollationRootElements::IX_COMMON_SEC_AND_TER_CE];
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(commonSecTer != Collation::COMMON_SEC_AND_TER_CE) {
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
233fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uint32_t secTerBoundaries = data->rootElements[CollationRootElements::IX_SEC_TER_BOUNDARIES];
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if((secTerBoundaries >> 24) < CollationKeys::SEC_COMMON_HIGH) {
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // [fixed last secondary common byte] is too low,
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // and secondary weights would collide with compressed common secondaries.
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_CONTEXTS_OFFSET;
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;  // Tailored contexts without tailored trie.
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->contexts = reinterpret_cast<const UChar *>(inBytes + offset);
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->contextsLength = length / 2;
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_UNSAFE_BWD_OFFSET;
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(baseData == NULL) {
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Create the unsafe-backward set for the root collator.
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Include all non-zero combining marks and trail surrogates.
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // We do this at load time, rather than at build time,
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // to simplify Unicode version bootstrapping:
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The root data builder only needs the new FractionalUCA.txt data,
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // but it need not be built with a version of ICU already updated to
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // the corresponding new Unicode Character Database.
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            //
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // The following is an optimized version of
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // new UnicodeSet("[[:^lccc=0:][\\udc00-\\udfff]]").
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // It is faster and requires fewer code dependencies.
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet = new UnicodeSet(0xdc00, 0xdfff);  // trail surrogates
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(tailoring.unsafeBackwardSet == NULL) {
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            data->nfcImpl.addLcccChars(*tailoring.unsafeBackwardSet);
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Clone the root collator's set contents.
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet = static_cast<UnicodeSet *>(
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                baseData->unsafeBackwardSet->cloneAsThawed());
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(tailoring.unsafeBackwardSet == NULL) {
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                errorCode = U_MEMORY_ALLOCATION_ERROR;
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                return;
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Add the ranges from the data file to the unsafe-backward set.
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        USerializedSet sset;
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        const uint16_t *unsafeData = reinterpret_cast<const uint16_t *>(inBytes + offset);
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!uset_getSerializedSet(&sset, unsafeData, length / 2)) {
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t count = uset_getSerializedRangeCount(&sset);
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(int32_t i = 0; i < count; ++i) {
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UChar32 start, end;
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uset_getSerializedRange(&sset, i, &start, &end);
302fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            tailoring.unsafeBackwardSet->add(start, end);
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Mark each lead surrogate as "unsafe"
305fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // if any of its 1024 associated supplementary code points is "unsafe".
306fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UChar32 c = 0x10000;
307fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        for(UChar lead = 0xd800; lead < 0xdc00; ++lead, c += 0x400) {
308fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(!tailoring.unsafeBackwardSet->containsNone(c, c + 0x3ff)) {
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                tailoring.unsafeBackwardSet->add(lead);
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.unsafeBackwardSet->freeze();
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->unsafeBackwardSet = tailoring.unsafeBackwardSet;
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // No tailoring-specific data: Alias the root collator's set.
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->unsafeBackwardSet = baseData->unsafeBackwardSet;
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No unsafeBackwardSet.
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // If the fast Latin format version is different,
325fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // or the version is set to 0 for "no fast Latin table",
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // then just always use the normal string comparison path.
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(data != NULL) {
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->fastLatinTable = NULL;
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->fastLatinTableLength = 0;
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(((inIndexes[IX_OPTIONS] >> 16) & 0xff) == CollationFastLatin::VERSION) {
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            index = IX_FAST_LATIN_TABLE_OFFSET;
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            offset = getIndex(inIndexes, indexesLength, index);
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = getIndex(inIndexes, indexesLength, index + 1) - offset;
334fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if(length >= 2) {
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTable = reinterpret_cast<const uint16_t *>(inBytes + offset);
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTableLength = length / 2;
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                if((*data->fastLatinTable >> 8) != CollationFastLatin::VERSION) {
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    errorCode = U_INVALID_FORMAT_ERROR;  // header vs. table version mismatch
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    return;
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                }
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            } else if(baseData != NULL) {
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTable = baseData->fastLatinTable;
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                data->fastLatinTableLength = baseData->fastLatinTableLength;
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_SCRIPTS_OFFSET;
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 2) {
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
353fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
354fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
355fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
3561b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        const uint16_t *scripts = reinterpret_cast<const uint16_t *>(inBytes + offset);
3571b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        int32_t scriptsLength = length / 2;
3581b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->numScripts = scripts[0];
3591b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // There must be enough entries for both arrays, including more than two range starts.
3601b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStartsLength = scriptsLength - (1 + data->numScripts + 16);
3611b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(data->scriptStartsLength <= 2 ||
3621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                CollationData::MAX_NUM_SCRIPT_RANGES < data->scriptStartsLength) {
3631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            errorCode = U_INVALID_FORMAT_ERROR;
3641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return;
3651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
3661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptsIndex = scripts + 1;
3671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStarts = scripts + 1 + data->numScripts + 16;
3681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(!(data->scriptStarts[0] == 0 &&
3691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                data->scriptStarts[1] == ((Collation::MERGE_SEPARATOR_BYTE + 1) << 8) &&
3701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                data->scriptStarts[data->scriptStartsLength - 1] ==
3711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                        (Collation::TRAIL_WEIGHT_BYTE << 8))) {
3721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            errorCode = U_INVALID_FORMAT_ERROR;
3731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            return;
3741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
375fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
376fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
377fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
3781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->numScripts = baseData->numScripts;
3791b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptsIndex = baseData->scriptsIndex;
3801b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStarts = baseData->scriptStarts;
3811b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data->scriptStartsLength = baseData->scriptStartsLength;
382fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
383fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
384fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    index = IX_COMPRESSIBLE_BYTES_OFFSET;
385fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    offset = getIndex(inIndexes, indexesLength, index);
386fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = getIndex(inIndexes, indexesLength, index + 1) - offset;
387fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(length >= 256) {
388fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data == NULL) {
389fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_INVALID_FORMAT_ERROR;
390fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return;
391fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
392fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->compressibleBytes = reinterpret_cast<const UBool *>(inBytes + offset);
393fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(data == NULL) {
394fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Nothing to do.
395fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData != NULL) {
396fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        data->compressibleBytes = baseData->compressibleBytes;
397fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
398fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;  // No compressibleBytes[].
399fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
400fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
401fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
402fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationSettings &ts = *tailoring.settings;
403fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t options = inIndexes[IX_OPTIONS] & 0xffff;
404fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uint16_t fastLatinPrimaries[CollationFastLatin::LATIN_LIMIT];
405fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fastLatinOptions = CollationFastLatin::getOptions(
406f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius            tailoring.data, ts, fastLatinPrimaries, UPRV_LENGTHOF(fastLatinPrimaries));
407fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(options == ts.options && ts.variableTop != 0 &&
408fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            reorderCodesLength == ts.reorderCodesLength &&
409fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcmp(reorderCodes, ts.reorderCodes, reorderCodesLength * 4) == 0 &&
410fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fastLatinOptions == ts.fastLatinOptions &&
411fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            (fastLatinOptions < 0 ||
412fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                uprv_memcmp(fastLatinPrimaries, ts.fastLatinPrimaries,
413fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                            sizeof(fastLatinPrimaries)) == 0)) {
414fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
415fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
416fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
417fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    CollationSettings *settings = SharedObject::copyOnWrite(tailoring.settings);
418fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(settings == NULL) {
419fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
420fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
421fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
422fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->options = options;
423fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Set variableTop from options and scripts data.
424fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->variableTop = tailoring.data->getLastPrimaryForGroup(
425fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            UCOL_REORDER_CODE_FIRST + settings->getMaxVariable());
426fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(settings->variableTop == 0) {
427fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_INVALID_FORMAT_ERROR;
428fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return;
429fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
430fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
4311b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if(reorderCodesLength != 0) {
4321b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        settings->aliasReordering(*baseData, reorderCodes, reorderCodesLength,
4331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                  reorderRanges, reorderRangesLength,
4341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert                                  reorderTable, errorCode);
435fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
436fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
437fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    settings->fastLatinOptions = CollationFastLatin::getOptions(
438fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        tailoring.data, *settings,
439f9878a236aa0d9662d8e40cafdaf2e04cd615835ccornelius        settings->fastLatinPrimaries, UPRV_LENGTHOF(settings->fastLatinPrimaries));
440fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
441fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
442fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusUBool U_CALLCONV
443fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataReader::isAcceptable(void *context,
444fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  const char * /* type */, const char * /*name*/,
445fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                  const UDataInfo *pInfo) {
446fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(
447fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->size >= 20 &&
448fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->isBigEndian == U_IS_BIG_ENDIAN &&
449fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->charsetFamily == U_CHARSET_FAMILY &&
450fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[0] == 0x55 &&  // dataFormat="UCol"
451fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[1] == 0x43 &&
452fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[2] == 0x6f &&
453fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        pInfo->dataFormat[3] == 0x6c &&
4541b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        pInfo->formatVersion[0] == 5
455fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    ) {
456fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UVersionInfo *version = static_cast<UVersionInfo *>(context);
457fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(version != NULL) {
458fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcpy(version, pInfo->dataVersion, 4);
459fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
460fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return TRUE;
461fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
462fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return FALSE;
463fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
464fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
465fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
466fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
467fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
468fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
469