10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others.
264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html
3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/*
4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
51b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines
6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others.  All Rights Reserved.
7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*******************************************************************************
8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatawriter.cpp
9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*
10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013aug06
11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer
12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/
13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h"
15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION
17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h"
19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/udata.h"
20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h"
21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h"
22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h"
23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatabuilder.h"
24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatareader.h"
25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatawriter.h"
26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfastlatin.h"
27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h"
28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h"
29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h"
30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucmndata.h"
31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN
33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint8_t *
35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const {
36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return NULL; }
37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000));
38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(buffer.isNull()) {
39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_MEMORY_ALLOCATION_ERROR;
40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return NULL;
41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    length = cloneBinary(buffer.getAlias(), 20000, errorCode);
43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(errorCode == U_BUFFER_OVERFLOW_ERROR) {
44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(buffer.allocateInsteadAndCopy(length, 0) == NULL) {
45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = U_MEMORY_ALLOCATION_ERROR;
46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return NULL;
47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_ZERO_ERROR;
49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        length = cloneBinary(buffer.getAlias(), length, errorCode);
50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return NULL; }
52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return buffer.orphan();
53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const {
57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1];
58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return CollationDataWriter::writeTailoring(
59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            *tailoring, *settings, indexes, dest, capacity,
60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode);
61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const UDataInfo dataInfo = {
64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    sizeof(UDataInfo),
65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0,
66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_IS_BIG_ENDIAN,
68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_CHARSET_FAMILY,
69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_SIZEOF_UCHAR,
70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    0,
71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { 0x55, 0x43, 0x6f, 0x6c },         // dataFormat="UCol"
731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    { 5, 0, 0, 0 },                     // formatVersion
74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    { 6, 3, 0, 0 }                      // dataVersion
75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius};
76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings,
79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               const void *rootElements, int32_t rootElementsLength,
80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               int32_t indexes[], uint8_t *dest, int32_t capacity,
81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                               UErrorCode &errorCode) {
82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return write(TRUE, NULL,
83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 data, settings,
84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 rootElements, rootElementsLength,
85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 indexes, dest, capacity, errorCode);
86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings,
90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                    int32_t indexes[], uint8_t *dest, int32_t capacity,
91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                    UErrorCode &errorCode) {
92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return write(FALSE, t.version,
93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 *t.data, settings,
94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 NULL, 0,
95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                 indexes, dest, capacity, errorCode);
96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t
99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion,
100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           const CollationData &data, const CollationSettings &settings,
101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           const void *rootElements, int32_t rootElementsLength,
102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           int32_t indexes[], uint8_t *dest, int32_t capacity,
103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                           UErrorCode &errorCode) {
104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(U_FAILURE(errorCode)) { return 0; }
105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(capacity < 0 || (capacity > 0 && dest == NULL)) {
106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_ILLEGAL_ARGUMENT_ERROR;
107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return 0;
108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Figure out which data items to write before settling on
111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // the indexes length and writing offsets.
112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // For any data item, we need to write the start and limit offsets,
113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // so the indexes length must be at least index-of-start-offset + 2.
114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t indexesLength;
115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UBool hasMappings;
116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    UnicodeSet unsafeBackwardSet;
117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    const CollationData *baseData = data.base;
118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fastLatinVersion;
120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(data.fastLatinTable != NULL) {
121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16;
122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fastLatinVersion = 0;
124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t fastLatinTableLength = 0;
126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(isBase) {
128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // For the root collator, we write an even number of indexes
129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // so that we start with an 8-aligned offset.
130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1;
131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(settings.reorderCodesLength == 0);
132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        hasMappings = TRUE;
133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        unsafeBackwardSet = *data.unsafeBackwardSet;
134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        fastLatinTableLength = data.fastLatinTableLength;
135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else if(baseData == NULL) {
136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        hasMappings = FALSE;
137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(settings.reorderCodesLength == 0) {
138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // only options
139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            indexesLength = CollationDataReader::IX_OPTIONS + 1;  // no limit offset here
140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // only options, reorder codes, and the reorder table
142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2;
143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        hasMappings = TRUE;
146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Tailored mappings, and what else?
147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // Check in ascending order of optional tailoring data items.
148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2;
149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data.contextsLength != 0) {
150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2;
151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet);
153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(!unsafeBackwardSet.isEmpty()) {
154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2;
155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(data.fastLatinTable != baseData->fastLatinTable) {
157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            fastLatinTableLength = data.fastLatinTableLength;
158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2;
159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
1621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UVector32 codesAndRanges(errorCode);
1631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    const int32_t *reorderCodes = settings.reorderCodes;
1641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    int32_t reorderCodesLength = settings.reorderCodesLength;
1651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    if(settings.hasReordering() &&
1661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) {
1671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Rebuild the full list of reorder ranges.
1681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // The list in the settings is truncated for efficiency.
1691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode);
1701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        // Write the codes, then the ranges.
1711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        for(int32_t i = 0; i < reorderCodesLength; ++i) {
1721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode);
1731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        }
1741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        if(U_FAILURE(errorCode)) { return 0; }
1751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        reorderCodes = codesAndRanges.getBuffer();
1761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        reorderCodesLength = codesAndRanges.size();
1771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    }
1781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert
179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t headerSize;
180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(isBase) {
181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        headerSize = 0;  // udata_create() writes the header
182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        DataHeader header;
184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        header.dataHeader.magic1 = 0xda;
185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        header.dataHeader.magic2 = 0x27;
186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo));
187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo));
188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        headerSize = (int32_t)sizeof(header);
189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT((headerSize & 3) == 0);  // multiple of 4 bytes
190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(hasMappings && data.cesLength != 0) {
191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Sum of the sizes of the data items which are
192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // not automatically multiples of 8 bytes and which are placed before the CEs.
1931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert            int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4;
194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            if((sum & 7) != 0) {
195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // We need to add padding somewhere so that the 64-bit CEs are 8-aligned.
196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // We add to the header size here.
197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // Alternatively, we could increment the indexesLength
198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                // or add a few bytes to the reorderTable.
199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                headerSize += 4;
200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            }
201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        header.dataHeader.headerSize = (uint16_t)headerSize;
203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(headerSize <= capacity) {
204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memcpy(dest, &header, sizeof(header));
205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            // Write 00 bytes so that the padding is not mistaken for a copyright string.
206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header));
207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dest += headerSize;
208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            capacity -= headerSize;
209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            dest = NULL;
211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            capacity = 0;
212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength;
216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    U_ASSERT((settings.options & ~0xffff) == 0);
217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_OPTIONS] =
218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            data.numericPrimary | fastLatinVersion | settings.options;
219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_RESERVED2] = 0;
220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_RESERVED3] = 0;
221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // Byte offsets of data items all start from the start of the indexes.
223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // We add the headerSize at the very end.
224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t totalSize = indexesLength * 4;
225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) {
227ffdc27edd5503111189fc11165c5a11289a71f79Fredrik Roubert        indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s);
228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    } else {
229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1;
230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize;
2331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    totalSize += reorderCodesLength * 4;
234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize;
236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(settings.reorderTable != NULL) {
237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += 256;
238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize;
241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings) {
242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UErrorCode errorCode2 = U_ZERO_ERROR;
243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length;
244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(totalSize < capacity) {
245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = utrie2_serialize(data.trie, dest + totalSize,
246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                                      capacity - totalSize, &errorCode2);
247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = utrie2_serialize(data.trie, NULL, 0, &errorCode2);
249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = errorCode2;
252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 0;
253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // The trie size should be a multiple of 8 bytes due to the way
255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        // compactIndex2(UNewTrie2 *trie) currently works.
256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT((length & 7) == 0);
257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += length;
258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize;
261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_CES_OFFSET] = totalSize;
262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings && data.cesLength != 0) {
263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        U_ASSERT(((headerSize + totalSize) & 7) == 0);
264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += data.cesLength * 8;
265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize;
268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize;
269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings) {
270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += data.ce32sLength * 4;
271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize;
274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    totalSize += rootElementsLength * 4;
275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize;
277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings) {
278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += data.contextsLength * 2;
279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize;
282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(hasMappings && !unsafeBackwardSet.isEmpty()) {
283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        UErrorCode errorCode2 = U_ZERO_ERROR;
284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        int32_t length;
285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(totalSize < capacity) {
286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize);
287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = unsafeBackwardSet.serialize(
288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                    p, (capacity - totalSize) / 2, errorCode2);
289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        } else {
290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            length = unsafeBackwardSet.serialize(NULL, 0, errorCode2);
291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) {
293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            errorCode = errorCode2;
294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius            return 0;
295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        }
296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += length * 2;
297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize;
300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    totalSize += fastLatinTableLength * 2;
301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
3021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    UnicodeString scripts;
303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize;
304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(isBase) {
3051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        scripts.append((UChar)data.numScripts);
3061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16);
3071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength);
3081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert        totalSize += scripts.length() * 2;
309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize;
312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(isBase) {
313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        totalSize += 256;
314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize;
317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize;
318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(totalSize > capacity) {
320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        errorCode = U_BUFFER_OVERFLOW_ERROR;
321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        return headerSize + totalSize;
322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    uprv_memcpy(dest, indexes, indexesLength * 4);
3251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest);
326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest);
327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The trie has already been serialized into the dest buffer.
328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest);
329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest);
330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest);
331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest);
332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    // The unsafeBackwardSet has already been serialized into the dest buffer.
333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest);
3341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert    copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest);
335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest);
336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    return headerSize + totalSize;
338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid
341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex,
342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius                              const void *src, uint8_t *dest) {
343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t start = indexes[startIndex];
344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    int32_t limit = indexes[startIndex + 1];
345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    if(start < limit) {
346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius        uprv_memcpy(dest + start, src, limit - start);
347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius    }
348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}
349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END
351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius
352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif  // !UCONFIG_NO_COLLATION
353