1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius* Copyright (C) 1999-2012, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: store.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 2003-02-06 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Ram Viswanadha 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h> 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h" 25103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unewdata.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "gensprep.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uhash.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DO_DEBUG_OUT 0 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * StringPrep profile file format ------------------------------------ 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The file format prepared and written here contains a 16-bit trie and a mapping table. 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Before the data contents described below, there are the headers required by 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the udata API for loading ICU data. Especially, a UDataInfo structure 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * precedes the actual data. It contains platform properties values and the 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * file format version. 44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The following is a description of format version 2. 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Data contents: 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The contents is a parsed, binary form of RFC3454 and possibly 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * NormalizationCorrections.txt depending on the options specified on the profile. 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Any Unicode code point from 0 to 0x10ffff can be looked up to get 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the trie-word, if any, for that code point. This means that the input 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the lookup are 21-bit unsigned integers, with not all of the 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 21-bit range used. 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c. 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * After that there are the following structures: 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * int32_t indexes[_SPREP_INDEX_TOP]; -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UTrie stringPrepTrie; -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE] 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uint16_t mappingTable[]; -- Contains the sequecence of code units that the code point maps to 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] 66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The indexes array contains the following values: 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_INDEX_TRIE_SIZE] -- The size of the StringPrep trie in bytes 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_INDEX_MAPPING_DATA_SIZE] -- The size of the mappingTable in bytes 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] -- The index of Unicode version of last entry in NormalizationCorrections.txt 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] -- The starting index of 1 UChar mapping index in the mapping table 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] -- The starting index of 2 UChars mapping index in the mapping table 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] -- The starting index of 4 UChars mapping index in the mapping table 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indexes[_SPREP_OPTIONS] -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * StringPrep Trie : 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The StringPrep tries is a 16-bit trie that contains data for the profile. 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each code point is associated with a value (trie-word) in the trie. 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - structure of data words from the trie 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * i) A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0) 86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * represents the type associated with the code point 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if(trieWord >= _SPREP_TYPE_THRESHOLD){ 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * type = trieWord - 0xFFF0; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The type can be : 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * USPREP_UNASSIGNED 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * USPREP_PROHIBITED 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * USPREP_DELETE 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * contains distribution described below 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 0 - ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped. 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 1 - ON : The value in the next 14 bits is an index into the mapping table 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * OFF: The value in the next 14 bits is an delta value from the code point 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 2..15 - Contains data as described by bit 1. If all bits are set 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Mapping Table: 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The data in mapping table is sorted according to the length of the mapping sequence. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the type of the code point is USPREP_MAP and value in trie word is an index, the index 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is compared with start indexes of sequence length start to figure out the length according to 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following algorithm: 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if( index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] && 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){ 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length = 1; 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] && 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){ 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length = 2; 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] && 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){ 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length = 3; 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * }else{ 121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // The first position in the mapping table contains the length 122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * // of the sequence 123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * length = mappingTable[index++]; 124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * } 126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* file data ---------------------------------------------------------------- */ 130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* indexes[] value names */ 131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if UCONFIG_NO_IDNA 133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* dummy UDataInfo cf. udata.h */ 135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UDataInfo dataInfo = { 136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UDataInfo), 137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_IS_BIG_ENDIAN, 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_CHARSET_FAMILY, 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_SIZEOF_UCHAR, 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0, 0, 0, 0 }, /* dummy dataFormat */ 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0, 0, 0, 0 }, /* dummy formatVersion */ 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0, 0, 0, 0 } /* dummy dataVersion */ 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t indexes[_SPREP_INDEX_TOP]={ 0 }; 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint16_t* mappingData= NULL; 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */ 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int16_t currentIndex = 0; /* the current index into the data trie */ 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t maxLength = 0; /* maximum length of mapping string */ 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UDataInfo cf. udata.h */ 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UDataInfo dataInfo={ 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UDataInfo), 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_IS_BIG_ENDIAN, 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_CHARSET_FAMILY, 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_SIZEOF_UCHAR, 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x53, 0x50, 0x52, 0x50 }, /* dataFormat="SPRP" */ 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 3, 2, 0, 0 } /* dataVersion (Unicode version) */ 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetUnicodeVersion(const char *v) { 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVersionInfo version; 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_versionFromString(version, v); 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(dataInfo.dataVersion, version, 4); 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetUnicodeVersionNC(UVersionInfo version){ 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t univer = version[0] << 24; 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru univer += version[1] << 16; 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru univer += version[2] << 8; 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru univer += version[3]; 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer; 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UNewTrie *sprepTrie; 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_DATA_LENGTH 11500 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPREP_DELTA_RANGE_POSITIVE_LIMIT 8191 194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT -8192 195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinit() { 199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie)); 201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize the two tries */ 203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) { 204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: failed to initialize tries\n"); 205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UHashtable* hashTable = NULL; 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct ValueStruct { 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* mapping; 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t length; 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UStringPrepType type; 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} ValueStruct; 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for deleting the value from the hashtable */ 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV valueDeleter(void* obj){ 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ValueStruct* value = (ValueStruct*) obj; 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(value->mapping); 222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(value); 223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for hashing the entry */ 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV hashEntry(const UHashTok parm) { 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return parm.integer; 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for comparing two entries */ 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (UBool)(p1.integer != p2.integer); 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreMappingData(){ 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t pos = -1; 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const UHashElement* element = NULL; 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ValueStruct* value = NULL; 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t codepoint = 0; 24385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho int32_t elementCount = 0; 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t writtenElementCount = 0; 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t mappingLength = 1; /* minimum mapping length */ 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t oldMappingLength = 0; 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t trieWord =0; 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t limitIndex = 0; 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 25085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (hashTable == NULL) { 25185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho return; 25285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 25385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho elementCount = uhash_count(hashTable); 25485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 25585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /*initialize the mapping data */ 256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR); 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(writtenElementCount < elementCount){ 259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while( (element = uhash_nextElement(hashTable, &pos))!=NULL){ 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru codepoint = element->key.integer; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = (ValueStruct*)element->value.pointer; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* store the start of indexes */ 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(oldMappingLength != mappingLength){ 267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* Assume that index[] is used according to the enums defined */ 268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){ 269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex; 270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH && 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){ 273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limitIndex = currentIndex; 275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru oldMappingLength = mappingLength; 278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value->length == mappingLength){ 281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t savedTrieWord = 0; 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord = currentIndex << 2; 283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* turn on the 2nd bit to signal that the following bits contain an index */ 284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord += 0x02; 285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(trieWord > _SPREP_TYPE_THRESHOLD){ 287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_CHAR_FOUND); 289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* figure out if the code point has type already stored */ 291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(savedTrieWord!=0){ 293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* turn on the first bit in trie word */ 295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord += 0x01; 296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the codepoint has value something other than prohibited 299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and a mapping .. error! 300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now set the value in the trie */ 307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Could not set the value for code point.\n"); 309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* written the trie word for the codepoint... increment the count*/ 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru writtenElementCount++; 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sanity check are we exceeding the max number allowed */ 316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){ 31754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", 31854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius currentIndex+value->length, _SPREP_MAX_INDEX_VALUE); 319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_INDEX_OUTOFBOUNDS_ERROR); 320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* copy the mapping data */ 32354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /* write the length */ 32454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){ 32554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /* the cast here is safe since we donot expect the length to be > 65535 */ 32654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius mappingData[currentIndex++] = (uint16_t) mappingLength; 32754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius } 32854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /* copy the contents to mappindData array */ 32954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR); 33054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius currentIndex += value->length; 33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius if (currentIndex > mappingDataCapacity) { 33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */ 33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius fprintf(stderr, "gensprep, fatal error at %s, %d. Aborting.\n", __FILE__, __LINE__); 33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius exit(U_INTERNAL_PROGRAM_ERROR); 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappingLength++; 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pos = -1; 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* set the last length for range check */ 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){ 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1; 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex; 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void setOptions(int32_t options){ 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_OPTIONS] = options; 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreMapping(uint32_t codepoint, uint32_t* mapping,int32_t length, 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UStringPrepType type, UErrorCode* status){ 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar* map = NULL; 35954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius int16_t adjustedLen=0, i, j; 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t trieWord = 0; 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ValueStruct *value = NULL; 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t savedTrieWord = 0; 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* initialize the hashtable */ 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(hashTable==NULL){ 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru hashTable = uhash_open(hashEntry, compareEntries, NULL, status); 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_setValueDeleter(hashTable, valueDeleter); 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* figure out if the code point has type already stored */ 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL); 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(savedTrieWord!=0){ 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){ 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* turn on the first bit in trie word */ 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord += 0x01; 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the codepoint has value something other than prohibited 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and a mapping .. error! 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint); 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* figure out the real length */ 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru for(i=0; i<length; i++){ 38854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius adjustedLen += U16_LENGTH(mapping[i]); 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(adjustedLen == 0){ 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2); 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* make sure that the value of trieWord is less than the threshold */ 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(trieWord < _SPREP_TYPE_THRESHOLD){ 395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now set the value in the trie */ 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Could not set the value for code point.\n"); 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* value is set so just return */ 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_CHAR_FOUND); 405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(adjustedLen == 1){ 409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* calculate the delta */ 410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]); 411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){ 412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord = delta << 2; 414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* make sure that the second bit is OFF */ 417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((trieWord & 0x02) != 0 ){ 418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n"); 419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_INTERNAL_PROGRAM_ERROR); 420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* make sure that the value of trieWord is less than the threshold */ 422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(trieWord < _SPREP_TYPE_THRESHOLD){ 423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now set the value in the trie */ 424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(sprepTrie,codepoint,trieWord)){ 425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Could not set the value for code point.\n"); 426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* value is set so just return */ 429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * if the delta is not in the given range or if the trieWord is larger than the threshold 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * just fall through for storing the mapping in the mapping table 435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 438103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR); 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius for (i=0, j=0; i<length; i++) { 44154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius U16_APPEND_UNSAFE(map, j, mapping[i]); 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct)); 445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value->mapping = map; 44654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius value->type = type; 447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value->length = adjustedLen; 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){ 449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappingDataCapacity++; 450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(maxLength < value->length){ 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru maxLength = value->length; 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uhash_iput(hashTable,codepoint,value,status); 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru mappingDataCapacity += adjustedLen; 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(*status)){ 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status)); 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(*status); 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){ 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t trieWord = 0; 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){ 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n"); 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_CHAR_FOUND); 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */ 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(start == end){ 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL); 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(savedTrieWord>0){ 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){ 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * A mapping is stored in the trie word 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * and the only other possible type that a 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * code point can have is USPREP_PROHIBITED 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* turn on the 0th bit in the savedTrieWord */ 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru savedTrieWord += 0x01; 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* the downcast is safe since we only save 16 bit values */ 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieWord = (uint16_t)savedTrieWord; 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* make sure that the value of trieWord is less than the threshold */ 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(trieWord < _SPREP_TYPE_THRESHOLD){ 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* now set the value in the trie */ 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(sprepTrie,start,trieWord)){ 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Could not set the value for code point.\n"); 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* value is set so just return */ 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return; 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD); 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_CHAR_FOUND); 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else if(savedTrieWord != trieWord){ 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start); 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* if savedTrieWord == trieWord .. fall through and set the value */ 509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(sprepTrie,start,trieWord)){ 511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start); 512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_ARGUMENT_ERROR); 513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }else{ 515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){ 516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr,"Value for certain codepoint already set.\n"); 517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_ILLEGAL_CHAR_FOUND); 518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* folding value: just store the offset (16 bits) if there is any non-0 entry */ 524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV 525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) { 526103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius uint32_t value; 527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 limit=0; 528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UBool inBlockZero; 529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru limit=start+0x400; 531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru while(start<limit) { 532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=utrie_get32(trie, start, &inBlockZero); 533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(inBlockZero) { 534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru start+=UTRIE_DATA_BLOCK_LENGTH; 535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else if(value!=0) { 536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return (uint32_t)offset; 537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru ++start; 539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 0; 542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */ 546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateData(const char *dataDir, const char* bundleName) { 549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static uint8_t sprepTrieBlock[100000]; 550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData; 552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t size, dataLength; 554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100); 555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if UCONFIG_NO_IDNA 557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size=0; 559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else 561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t sprepTrieSize; 563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* sort and add mapping data */ 565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru storeMappingData(); 566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode); 568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode)); 570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes); 574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(beVerbose) { 575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("size of sprep trie %5u bytes\n", (int)sprepTrieSize); 576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size); 577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR); 578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex); 579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("Maximum length of the mapping string is : %i \n", (int)maxLength); 580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fileName[0]=0; 585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_strcat(fileName,bundleName); 586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the data */ 587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo, 588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); 589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode); 591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA 595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize; 597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR; 598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, indexes, sizeof(indexes)); 600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize); 601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]); 602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finish up */ 607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataLength=udata_finish(pData, &errorCode); 608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode); 610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(dataLength!=size) { 614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n", 615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (long)dataLength, (long)size); 616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_INTERNAL_PROGRAM_ERROR); 617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA 620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* done with writing the data .. close the hashtable */ 62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if (hashTable != NULL) { 62285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho uhash_close(hashTable); 62385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif 62554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius 62654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_free(fileName); 627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA 630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucleanUpData(void) { 63354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius uprv_free(mappingData); 634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utrie_close(sprepTrie); 635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_free(sprepTrie); 636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */ 639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Hey, Emacs, please set the following: 642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Local Variables: 644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indent-tabs-mode: nil 645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * End: 646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 648