16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Copyright (C) 1999-2012, International Business Machines 56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* Corporation and others. All Rights Reserved. 66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org******************************************************************************* 86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* file name: store.c 96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* encoding: US-ASCII 106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* tab size: 8 (not used) 116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* indentation:4 126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created on: 2003-02-06 146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* created by: Ram Viswanadha 156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org* 166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/ 176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h> 196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdlib.h> 206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h" 216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h" 226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h" 236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "filestrm.h" 246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h" 256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h" 266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "utrie.h" 276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unewdata.h" 286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "gensprep.h" 296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uhash.h" 306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define DO_DEBUG_OUT 0 336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * StringPrep profile file format ------------------------------------ 376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The file format prepared and written here contains a 16-bit trie and a mapping table. 396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Before the data contents described below, there are the headers required by 416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the udata API for loading ICU data. Especially, a UDataInfo structure 426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * precedes the actual data. It contains platform properties values and the 436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * file format version. 446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The following is a description of format version 2. 466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Data contents: 486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The contents is a parsed, binary form of RFC3454 and possibly 506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * NormalizationCorrections.txt depending on the options specified on the profile. 516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Any Unicode code point from 0 to 0x10ffff can be looked up to get 536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the trie-word, if any, for that code point. This means that the input 546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to the lookup are 21-bit unsigned integers, with not all of the 556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 21-bit range used. 566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c. 586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * After that there are the following structures: 596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * int32_t indexes[_SPREP_INDEX_TOP]; -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file 616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UTrie stringPrepTrie; -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE] 636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * uint16_t mappingTable[]; -- Contains the sequecence of code units that the code point maps to 656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] 666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The indexes array contains the following values: 686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_INDEX_TRIE_SIZE] -- The size of the StringPrep trie in bytes 696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] -- The size of the mappingTable in bytes 706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] -- The index of Unicode version of last entry in NormalizationCorrections.txt 716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] -- The starting index of 1 UChar mapping index in the mapping table 726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] -- The starting index of 2 UChars mapping index in the mapping table 736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table 746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] -- The starting index of 4 UChars mapping index in the mapping table 756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indexes[_SPREP_OPTIONS] -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON 766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * StringPrep Trie : 796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The StringPrep tries is a 16-bit trie that contains data for the profile. 816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Each code point is associated with a value (trie-word) in the trie. 826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - structure of data words from the trie 846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * i) A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0) 866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * represents the type associated with the code point 876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if(trieWord >= _SPREP_TYPE_THRESHOLD){ 886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * type = trieWord - 0xFFF0; 896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * } 906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The type can be : 916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * USPREP_UNASSIGNED 926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * USPREP_PROHIBITED 936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * USPREP_DELETE 946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and 966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * contains distribution described below 976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 0 - ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped. 996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1 - ON : The value in the next 14 bits is an index into the mapping table 1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * OFF: The value in the next 14 bits is an delta value from the code point 1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 2..15 - Contains data as described by bit 1. If all bits are set 1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE 1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Mapping Table: 1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The data in mapping table is sorted according to the length of the mapping sequence. 1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the type of the code point is USPREP_MAP and value in trie word is an index, the index 1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is compared with start indexes of sequence length start to figure out the length according to 1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the following algorithm: 1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if( index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * length = 1; 1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * length = 2; 1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * length = 3; 1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * }else{ 1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * // The first position in the mapping table contains the length 1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * // of the sequence 1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * length = mappingTable[index++]; 1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * } 1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* file data ---------------------------------------------------------------- */ 1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* indexes[] value names */ 1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_IDNA 1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* dummy UDataInfo cf. udata.h */ 1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UDataInfo dataInfo = { 1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UDataInfo), 1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_IS_BIG_ENDIAN, 1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_CHARSET_FAMILY, 1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_SIZEOF_UCHAR, 1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0, 0, 0, 0 }, /* dummy dataFormat */ 1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0, 0, 0, 0 }, /* dummy formatVersion */ 1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0, 0, 0, 0 } /* dummy dataVersion */ 1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t indexes[_SPREP_INDEX_TOP]={ 0 }; 1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint16_t* mappingData= NULL; 1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */ 1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int16_t currentIndex = 0; /* the current index into the data trie */ 1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t maxLength = 0; /* maximum length of mapping string */ 1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UDataInfo cf. udata.h */ 1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UDataInfo dataInfo={ 1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sizeof(UDataInfo), 1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_IS_BIG_ENDIAN, 1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_CHARSET_FAMILY, 1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U_SIZEOF_UCHAR, 1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 0, 1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */ 1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ 1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org { 3, 2, 0, 0 } /* dataVersion (Unicode version) */ 1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}; 1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsetUnicodeVersion(const char *v) { 1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UVersionInfo version; 1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org u_versionFromString(version, v); 1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memcpy(dataInfo.dataVersion, version, 4); 1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid 1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsetUnicodeVersionNC(UVersionInfo version){ 1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t univer = version[0] << 24; 1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org univer += version[1] << 16; 1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org univer += version[2] << 8; 1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org univer += version[3]; 1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer; 1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UNewTrie *sprepTrie; 1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_DATA_LENGTH 11500 1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191 1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192 1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void 1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginit() { 1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie)); 2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* initialize the two tries */ 2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) { 2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "error: failed to initialize tries\n"); 2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_MEMORY_ALLOCATION_ERROR); 2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UHashtable* hashTable = NULL; 2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct ValueStruct { 2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* mapping; 2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int16_t length; 2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UStringPrepType type; 2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} ValueStruct; 2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for deleting the value from the hashtable */ 2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV valueDeleter(void* obj){ 2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ValueStruct* value = (ValueStruct*) obj; 2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(value->mapping); 2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(value); 2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for hashing the entry */ 2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t U_CALLCONV hashEntry(const UHashTok parm) { 2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return parm.integer; 2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for comparing two entries */ 2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) { 2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (UBool)(p1.integer != p2.integer); 2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void 2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreMappingData(){ 2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t pos = -1; 2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org const UHashElement* element = NULL; 2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ValueStruct* value = NULL; 2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t codepoint = 0; 2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t elementCount = 0; 2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t writtenElementCount = 0; 2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t mappingLength = 1; /* minimum mapping length */ 2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t oldMappingLength = 0; 2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t trieWord =0; 2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t limitIndex = 0; 2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (hashTable == NULL) { 2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org elementCount = uhash_count(hashTable); 2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /*initialize the mapping data */ 2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR); 2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(writtenElementCount < elementCount){ 2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ 2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org codepoint = element->key.integer; 2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value = (ValueStruct*)element->value.pointer; 2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* store the start of indexes */ 2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(oldMappingLength != mappingLength){ 2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* Assume that index[] is used according to the enums defined */ 2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ 2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; 2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && 2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ 2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limitIndex = currentIndex; 2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org oldMappingLength = mappingLength; 2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value->length == mappingLength){ 2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t savedTrieWord = 0; 2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord = currentIndex << 2; 2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* turn on the 2nd bit to signal that the following bits contain an index */ 2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord += 0x02; 2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trieWord > _SPREP_TYPE_THRESHOLD){ 2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_CHAR_FOUND); 2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* figure out if the code point has type already stored */ 2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(savedTrieWord!=0){ 2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* turn on the first bit in trie word */ 2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord += 0x01; 2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the codepoint has value something other than prohibited 2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and a mapping .. error! 3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now set the value in the trie */ 3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Could not set the value for code point.\n"); 3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* written the trie word for the codepoint... increment the count*/ 3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org writtenElementCount++; 3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sanity check are we exceeding the max number allowed */ 3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ 3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", 3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); 3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_INDEX_OUTOFBOUNDS_ERROR); 3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy the mapping data */ 3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the length */ 3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ 3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the cast here is safe since we donot expect the length to be > 65535 */ 3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingData[currentIndex++] = (uint16_t) mappingLength; 3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* copy the contents to mappindData array */ 3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); 3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org currentIndex += value->length; 3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (currentIndex > mappingDataCapacity) { 3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */ 3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "gensprep, fatal error at %s, %d. Aborting.\n", __FILE__, __LINE__); 3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_INTERNAL_PROGRAM_ERROR); 3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingLength++; 3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pos = -1; 3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* set the last length for range check */ 3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ 3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; 3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; 3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void setOptions(int32_t options){ 3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_OPTIONS] = options; 3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void 3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, 3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UStringPrepType type, UErrorCode* status){ 3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar* map = NULL; 3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int16_t adjustedLen=0, i, j; 3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t trieWord = 0; 3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ValueStruct *value = NULL; 3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t savedTrieWord = 0; 3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* initialize the hashtable */ 3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(hashTable==NULL){ 3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org hashTable = uhash_open(hashEntry, compareEntries, NULL, status); 3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_setValueDeleter(hashTable, valueDeleter); 3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* figure out if the code point has type already stored */ 3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(savedTrieWord!=0){ 3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* turn on the first bit in trie word */ 3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord += 0x01; 3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the codepoint has value something other than prohibited 3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and a mapping .. error! 3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* figure out the real length */ 3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for(i=0; i<length; i++){ 3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org adjustedLen += U16_LENGTH(mapping[i]); 3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(adjustedLen == 0){ 3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); 3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* make sure that the value of trieWord is less than the threshold */ 3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trieWord < _SPREP_TYPE_THRESHOLD){ 3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now set the value in the trie */ 3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Could not set the value for code point.\n"); 3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* value is set so just return */ 4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_CHAR_FOUND); 4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(adjustedLen == 1){ 4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* calculate the delta */ 4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); 4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ 4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord = delta << 2; 4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* make sure that the second bit is OFF */ 4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((trieWord & 0x02) != 0 ){ 4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); 4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_INTERNAL_PROGRAM_ERROR); 4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* make sure that the value of trieWord is less than the threshold */ 4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trieWord < _SPREP_TYPE_THRESHOLD){ 4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now set the value in the trie */ 4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Could not set the value for code point.\n"); 4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* value is set so just return */ 4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * if the delta is not in the given range or if the trieWord is larger than the threshold 4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * just fall through for storing the mapping in the mapping table 4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR); 4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org for (i=0, j=0; i<length; i++) { 4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org U16_APPEND_UNSAFE(map, j, mapping[i]); 4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); 4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value->mapping = map; 4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value->type = type; 4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value->length = adjustedLen; 4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ 4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingDataCapacity++; 4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(maxLength < value->length){ 4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org maxLength = value->length; 4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_iput(hashTable,codepoint,value,status); 4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org mappingDataCapacity += adjustedLen; 4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(*status)){ 4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); 4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(*status); 4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void 4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){ 4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint16_t trieWord = 0; 4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){ 4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n"); 4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_CHAR_FOUND); 4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */ 4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(start == end){ 4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL); 4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(savedTrieWord>0){ 4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){ 4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* 4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * A mapping is stored in the trie word 4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * and the only other possible type that a 4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * code point can have is USPREP_PROHIBITED 4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* turn on the 0th bit in the savedTrieWord */ 4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org savedTrieWord += 0x01; 4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* the downcast is safe since we only save 16 bit values */ 4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org trieWord = (uint16_t)savedTrieWord; 4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* make sure that the value of trieWord is less than the threshold */ 4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(trieWord < _SPREP_TYPE_THRESHOLD){ 4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* now set the value in the trie */ 4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_set32(sprepTrie,start,trieWord)){ 4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Could not set the value for code point.\n"); 4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* value is set so just return */ 4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return; 4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_CHAR_FOUND); 5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else if(savedTrieWord != trieWord){ 5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start); 5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* if savedTrieWord == trieWord .. fall through and set the value */ 5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_set32(sprepTrie,start,trieWord)){ 5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start); 5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_ARGUMENT_ERROR); 5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org }else{ 5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){ 5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr,"Value for certain codepoint already set.\n"); 5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_ILLEGAL_CHAR_FOUND); 5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* folding value: just store the offset (16 bits) if there is any non-0 entry */ 5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint32_t U_CALLCONV 5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { 5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uint32_t value; 5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UChar32 limit=0; 5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UBool inBlockZero; 5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org limit=start+0x400; 5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org while(start<limit) { 5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org value=utrie_get32(trie, start, &inBlockZero); 5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(inBlockZero) { 5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org start+=UTRIE_DATA_BLOCK_LENGTH; 5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else if(value!=0) { 5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return (uint32_t)offset; 5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } else { 5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org ++start; 5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org return 0; 5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_IDNA */ 5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void 5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggenerateData(const char *dataDir, const char* bundleName) { 5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org static uint8_t sprepTrieBlock[100000]; 5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UNewDataMemory *pData; 5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org UErrorCode errorCode=U_ZERO_ERROR; 5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t size, dataLength; 5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100); 5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_IDNA 5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size=0; 5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else 5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org int32_t sprepTrieSize; 5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* sort and add mapping data */ 5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org storeMappingData(); 5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode); 5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode)); 5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(errorCode); 5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes); 5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(beVerbose) { 5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize); 5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size); 5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR); 5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex); 5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org printf("Maximum length of the mapping string is : %i \n", (int)maxLength); 5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fileName[0]=0; 5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_strcat(fileName,bundleName); 5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* write the data */ 5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo, 5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); 5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode); 5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(errorCode); 5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA 5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize; 5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR; 5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_writeBlock(pData, indexes, sizeof(indexes)); 6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize); 6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]); 6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* finish up */ 6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org dataLength=udata_finish(pData, &errorCode); 6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(U_FAILURE(errorCode)) { 6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode); 6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(errorCode); 6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if(dataLength!=size) { 6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n", 6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org (long)dataLength, (long)size); 6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org exit(U_INTERNAL_PROGRAM_ERROR); 6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA 6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org /* done with writing the data .. close the hashtable */ 6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org if (hashTable != NULL) { 6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uhash_close(hashTable); 6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org } 6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif 6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(fileName); 6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA 6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void 6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanUpData(void) { 6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(mappingData); 6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org utrie_close(sprepTrie); 6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org uprv_free(sprepTrie); 6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} 6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_IDNA */ 6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org 6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* 6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Hey, Emacs, please set the following: 6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Local Variables: 6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indent-tabs-mode: nil 6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * End: 6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */ 648