10596faeddefbf198de137d5e893708495ab1584cFredrik Roubert// © 2016 and later: Unicode, Inc. and others. 264339d36f8bd4db5025fe2988eda22b491a9219cFredrik Roubert// License & terms of use: http://www.unicode.org/copyright.html 3fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius/* 4fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 51b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert* Copyright (C) 2013-2015, International Business Machines 6fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* Corporation and others. All Rights Reserved. 7fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius******************************************************************************* 8fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* collationdatawriter.cpp 9fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* 10fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created on: 2013aug06 11fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius* created by: Markus W. Scherer 12fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius*/ 13fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 14fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/utypes.h" 15fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 16fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#if !UCONFIG_NO_COLLATION 17fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 18fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/tblcoll.h" 19fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/udata.h" 20fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "unicode/uniset.h" 21fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "cmemory.h" 22fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdata.h" 23fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatabuilder.h" 24fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatareader.h" 25fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationdatawriter.h" 26fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationfastlatin.h" 27fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationsettings.h" 28fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "collationtailoring.h" 29fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "uassert.h" 30fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#include "ucmndata.h" 31fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 32fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_BEGIN 33fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 34fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusuint8_t * 35fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRuleBasedCollator::cloneRuleData(int32_t &length, UErrorCode &errorCode) const { 36fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return NULL; } 37fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius LocalMemory<uint8_t> buffer((uint8_t *)uprv_malloc(20000)); 38fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(buffer.isNull()) { 39fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 40fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 41fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 42fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = cloneBinary(buffer.getAlias(), 20000, errorCode); 43fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(errorCode == U_BUFFER_OVERFLOW_ERROR) { 44fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(buffer.allocateInsteadAndCopy(length, 0) == NULL) { 45fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_MEMORY_ALLOCATION_ERROR; 46fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return NULL; 47fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 48fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ZERO_ERROR; 49fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = cloneBinary(buffer.getAlias(), length, errorCode); 50fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 51fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return NULL; } 52fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return buffer.orphan(); 53fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 54fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 55fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 56fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusRuleBasedCollator::cloneBinary(uint8_t *dest, int32_t capacity, UErrorCode &errorCode) const { 57fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t indexes[CollationDataReader::IX_TOTAL_SIZE + 1]; 58fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return CollationDataWriter::writeTailoring( 59fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *tailoring, *settings, indexes, dest, capacity, 60fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode); 61fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 62fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 63fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusstatic const UDataInfo dataInfo = { 64fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius sizeof(UDataInfo), 65fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0, 66fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 67fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_IS_BIG_ENDIAN, 68fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_CHARSET_FAMILY, 69fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_SIZEOF_UCHAR, 70fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 0, 71fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 72fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { 0x55, 0x43, 0x6f, 0x6c }, // dataFormat="UCol" 731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert { 5, 0, 0, 0 }, // formatVersion 74fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius { 6, 3, 0, 0 } // dataVersion 75fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius}; 76fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 77fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 78fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::writeBase(const CollationData &data, const CollationSettings &settings, 79fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const void *rootElements, int32_t rootElementsLength, 80fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t indexes[], uint8_t *dest, int32_t capacity, 81fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 82fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return write(TRUE, NULL, 83fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data, settings, 84fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius rootElements, rootElementsLength, 85fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes, dest, capacity, errorCode); 86fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 87fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 88fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 89fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::writeTailoring(const CollationTailoring &t, const CollationSettings &settings, 90fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t indexes[], uint8_t *dest, int32_t capacity, 91fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 92fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return write(FALSE, t.version, 93fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius *t.data, settings, 94fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius NULL, 0, 95fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes, dest, capacity, errorCode); 96fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 97fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 98fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusint32_t 99fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::write(UBool isBase, const UVersionInfo dataVersion, 100fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData &data, const CollationSettings &settings, 101fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const void *rootElements, int32_t rootElementsLength, 102fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t indexes[], uint8_t *dest, int32_t capacity, 103fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode &errorCode) { 104fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode)) { return 0; } 105fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(capacity < 0 || (capacity > 0 && dest == NULL)) { 106fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_ILLEGAL_ARGUMENT_ERROR; 107fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 0; 108fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 109fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 110fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Figure out which data items to write before settling on 111fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // the indexes length and writing offsets. 112fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // For any data item, we need to write the start and limit offsets, 113fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // so the indexes length must be at least index-of-start-offset + 2. 114fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t indexesLength; 115fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UBool hasMappings; 116fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UnicodeSet unsafeBackwardSet; 117fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const CollationData *baseData = data.base; 118fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 119fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t fastLatinVersion; 120fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(data.fastLatinTable != NULL) { 121fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fastLatinVersion = (int32_t)CollationFastLatin::VERSION << 16; 122fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 123fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fastLatinVersion = 0; 124fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 125fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t fastLatinTableLength = 0; 126fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 127fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isBase) { 128fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // For the root collator, we write an even number of indexes 129fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // so that we start with an 8-aligned offset. 130fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_TOTAL_SIZE + 1; 131fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(settings.reorderCodesLength == 0); 132fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hasMappings = TRUE; 133fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius unsafeBackwardSet = *data.unsafeBackwardSet; 134fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fastLatinTableLength = data.fastLatinTableLength; 135fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else if(baseData == NULL) { 136fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hasMappings = FALSE; 137fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(settings.reorderCodesLength == 0) { 138fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // only options 139fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_OPTIONS + 1; // no limit offset here 140fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 141fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // only options, reorder codes, and the reorder table 142fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_REORDER_TABLE_OFFSET + 2; 143fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 144fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 145fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius hasMappings = TRUE; 146fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Tailored mappings, and what else? 147fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Check in ascending order of optional tailoring data items. 148fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_CE32S_OFFSET + 2; 149fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(data.contextsLength != 0) { 150fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_CONTEXTS_OFFSET + 2; 151fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 152fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius unsafeBackwardSet.addAll(*data.unsafeBackwardSet).removeAll(*baseData->unsafeBackwardSet); 153fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(!unsafeBackwardSet.isEmpty()) { 154fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_UNSAFE_BWD_OFFSET + 2; 155fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 156fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(data.fastLatinTable != baseData->fastLatinTable) { 157fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius fastLatinTableLength = data.fastLatinTableLength; 158fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexesLength = CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET + 2; 159fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 160fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 161fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 1621b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UVector32 codesAndRanges(errorCode); 1631b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert const int32_t *reorderCodes = settings.reorderCodes; 1641b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t reorderCodesLength = settings.reorderCodesLength; 1651b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if(settings.hasReordering() && 1661b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert CollationSettings::reorderTableHasSplitBytes(settings.reorderTable)) { 1671b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Rebuild the full list of reorder ranges. 1681b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // The list in the settings is truncated for efficiency. 1691b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert data.makeReorderRanges(reorderCodes, reorderCodesLength, codesAndRanges, errorCode); 1701b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert // Write the codes, then the ranges. 1711b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert for(int32_t i = 0; i < reorderCodesLength; ++i) { 1721b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert codesAndRanges.insertElementAt(reorderCodes[i], i, errorCode); 1731b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1741b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert if(U_FAILURE(errorCode)) { return 0; } 1751b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert reorderCodes = codesAndRanges.getBuffer(); 1761b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert reorderCodesLength = codesAndRanges.size(); 1771b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert } 1781b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert 179fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t headerSize; 180fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isBase) { 181fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius headerSize = 0; // udata_create() writes the header 182fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 183fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius DataHeader header; 184fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius header.dataHeader.magic1 = 0xda; 185fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius header.dataHeader.magic2 = 0x27; 186fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(&header.info, &dataInfo, sizeof(UDataInfo)); 187fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(header.info.dataVersion, dataVersion, sizeof(UVersionInfo)); 188fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius headerSize = (int32_t)sizeof(header); 189fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT((headerSize & 3) == 0); // multiple of 4 bytes 190fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings && data.cesLength != 0) { 191fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Sum of the sizes of the data items which are 192fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // not automatically multiples of 8 bytes and which are placed before the CEs. 1931b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert int32_t sum = headerSize + (indexesLength + reorderCodesLength) * 4; 194fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if((sum & 7) != 0) { 195fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We need to add padding somewhere so that the 64-bit CEs are 8-aligned. 196fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We add to the header size here. 197fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Alternatively, we could increment the indexesLength 198fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // or add a few bytes to the reorderTable. 199fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius headerSize += 4; 200fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 201fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 202fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius header.dataHeader.headerSize = (uint16_t)headerSize; 203fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(headerSize <= capacity) { 204fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(dest, &header, sizeof(header)); 205fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Write 00 bytes so that the padding is not mistaken for a copyright string. 206fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memset(dest + sizeof(header), 0, headerSize - (int32_t)sizeof(header)); 207fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest += headerSize; 208fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius capacity -= headerSize; 209fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 210fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius dest = NULL; 211fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius capacity = 0; 212fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 213fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 214fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 215fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_INDEXES_LENGTH] = indexesLength; 216fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT((settings.options & ~0xffff) == 0); 217fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_OPTIONS] = 218fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius data.numericPrimary | fastLatinVersion | settings.options; 219fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_RESERVED2] = 0; 220fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_RESERVED3] = 0; 221fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 222fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // Byte offsets of data items all start from the start of the indexes. 223fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // We add the headerSize at the very end. 224fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t totalSize = indexesLength * 4; 225fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 226fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings && (isBase || data.jamoCE32s != baseData->jamoCE32s)) { 227ffdc27edd5503111189fc11165c5a11289a71f79Fredrik Roubert indexes[CollationDataReader::IX_JAMO_CE32S_START] = static_cast<int32_t>(data.jamoCE32s - data.ce32s); 228fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 229fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_JAMO_CE32S_START] = -1; 230fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 231fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 232fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_REORDER_CODES_OFFSET] = totalSize; 2331b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert totalSize += reorderCodesLength * 4; 234fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 235fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_REORDER_TABLE_OFFSET] = totalSize; 236fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(settings.reorderTable != NULL) { 237fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += 256; 238fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 239fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 240fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_TRIE_OFFSET] = totalSize; 241fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings) { 242fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode errorCode2 = U_ZERO_ERROR; 243fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 244fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(totalSize < capacity) { 245fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = utrie2_serialize(data.trie, dest + totalSize, 246fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius capacity - totalSize, &errorCode2); 247fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 248fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = utrie2_serialize(data.trie, NULL, 0, &errorCode2); 249fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 250fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { 251fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = errorCode2; 252fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 0; 253fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 254fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The trie size should be a multiple of 8 bytes due to the way 255fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // compactIndex2(UNewTrie2 *trie) currently works. 256fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT((length & 7) == 0); 257fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += length; 258fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 259fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 260fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_RESERVED8_OFFSET] = totalSize; 261fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_CES_OFFSET] = totalSize; 262fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings && data.cesLength != 0) { 263fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius U_ASSERT(((headerSize + totalSize) & 7) == 0); 264fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += data.cesLength * 8; 265fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 266fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 267fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_RESERVED10_OFFSET] = totalSize; 268fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_CE32S_OFFSET] = totalSize; 269fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings) { 270fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += data.ce32sLength * 4; 271fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 272fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 273fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_ROOT_ELEMENTS_OFFSET] = totalSize; 274fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += rootElementsLength * 4; 275fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 276fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_CONTEXTS_OFFSET] = totalSize; 277fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings) { 278fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += data.contextsLength * 2; 279fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 280fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 281fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_UNSAFE_BWD_OFFSET] = totalSize; 282fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(hasMappings && !unsafeBackwardSet.isEmpty()) { 283fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius UErrorCode errorCode2 = U_ZERO_ERROR; 284fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t length; 285fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(totalSize < capacity) { 286fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uint16_t *p = reinterpret_cast<uint16_t *>(dest + totalSize); 287fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = unsafeBackwardSet.serialize( 288fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius p, (capacity - totalSize) / 2, errorCode2); 289fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } else { 290fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius length = unsafeBackwardSet.serialize(NULL, 0, errorCode2); 291fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 292fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(U_FAILURE(errorCode2) && errorCode2 != U_BUFFER_OVERFLOW_ERROR) { 293fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = errorCode2; 294fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return 0; 295fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 296fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += length * 2; 297fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 298fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 299fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET] = totalSize; 300fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += fastLatinTableLength * 2; 301fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 3021b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert UnicodeString scripts; 303fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_SCRIPTS_OFFSET] = totalSize; 304fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isBase) { 3051b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert scripts.append((UChar)data.numScripts); 3061b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert scripts.append(reinterpret_cast<const UChar *>(data.scriptsIndex), data.numScripts + 16); 3071b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert scripts.append(reinterpret_cast<const UChar *>(data.scriptStarts), data.scriptStartsLength); 3081b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert totalSize += scripts.length() * 2; 309fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 310fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 311fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET] = totalSize; 312fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(isBase) { 313fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius totalSize += 256; 314fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 315fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 316fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_RESERVED18_OFFSET] = totalSize; 317fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius indexes[CollationDataReader::IX_TOTAL_SIZE] = totalSize; 318fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 319fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(totalSize > capacity) { 320fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius errorCode = U_BUFFER_OVERFLOW_ERROR; 321fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return headerSize + totalSize; 322fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 323fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 324fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(dest, indexes, indexesLength * 4); 3251b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert copyData(indexes, CollationDataReader::IX_REORDER_CODES_OFFSET, reorderCodes, dest); 326fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_REORDER_TABLE_OFFSET, settings.reorderTable, dest); 327fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The trie has already been serialized into the dest buffer. 328fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_CES_OFFSET, data.ces, dest); 329fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_CE32S_OFFSET, data.ce32s, dest); 330fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_ROOT_ELEMENTS_OFFSET, rootElements, dest); 331fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_CONTEXTS_OFFSET, data.contexts, dest); 332fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius // The unsafeBackwardSet has already been serialized into the dest buffer. 333fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_FAST_LATIN_TABLE_OFFSET, data.fastLatinTable, dest); 3341b7d32f919554dda9c193b32188251337bc756f1Fredrik Roubert copyData(indexes, CollationDataReader::IX_SCRIPTS_OFFSET, scripts.getBuffer(), dest); 335fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius copyData(indexes, CollationDataReader::IX_COMPRESSIBLE_BYTES_OFFSET, data.compressibleBytes, dest); 336fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 337fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius return headerSize + totalSize; 338fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 339fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 340fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusvoid 341fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusCollationDataWriter::copyData(const int32_t indexes[], int32_t startIndex, 342fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius const void *src, uint8_t *dest) { 343fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t start = indexes[startIndex]; 344fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius int32_t limit = indexes[startIndex + 1]; 345fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius if(start < limit) { 346fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius uprv_memcpy(dest + start, src, limit - start); 347fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius } 348fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius} 349fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 350fceb39872958b9fa2505e63f8b8699a9e0f882f4ccorneliusU_NAMESPACE_END 351fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius 352fceb39872958b9fa2505e63f8b8699a9e0f882f4ccornelius#endif // !UCONFIG_NO_COLLATION 353