1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
4103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius*   Copyright (C) 1999-2012, International Business Machines
5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   Corporation and others.  All Rights Reserved.
6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*******************************************************************************
8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   file name:  store.c
9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   encoding:   US-ASCII
10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   tab size:   8 (not used)
11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   indentation:4
12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created on: 2003-02-06
14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*   created by: Ram Viswanadha
15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*
16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/
17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h>
19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdlib.h>
20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h"
21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h"
22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h"
23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "filestrm.h"
24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h"
25103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius#include "unicode/utf16.h"
26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h"
27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unewdata.h"
28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "gensprep.h"
29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uhash.h"
30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DO_DEBUG_OUT 0
33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * StringPrep profile file format ------------------------------------
37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The file format prepared and written here contains a 16-bit trie and a mapping table.
39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Before the data contents described below, there are the headers required by
41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the udata API for loading ICU data. Especially, a UDataInfo structure
42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * precedes the actual data. It contains platform properties values and the
43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * file format version.
44ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
45ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The following is a description of format version 2.
46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Data contents:
48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The contents is a parsed, binary form of RFC3454 and possibly
50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * NormalizationCorrections.txt depending on the options specified on the profile.
51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Any Unicode code point from 0 to 0x10ffff can be looked up to get
53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the trie-word, if any, for that code point. This means that the input
54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * to the lookup are 21-bit unsigned integers, with not all of the
55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 21-bit range used.
56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * After that there are the following structures:
59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to
65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
66ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The indexes array contains the following values:
68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * StringPrep Trie :
79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The StringPrep tries is a 16-bit trie that contains data for the profile.
81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Each code point is associated with a value (trie-word) in the trie.
82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * - structure of data words from the trie
84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
86ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      represents the type associated with the code point
87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *          type = trieWord - 0xFFF0;
89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      }
90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      The type can be :
91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             USPREP_UNASSIGNED
92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             USPREP_PROHIBITED
93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *             USPREP_DELETE
94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      contains distribution described below
97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      1       -  ON : The value in the next 14 bits is an index into the mapping table
100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                 OFF: The value in the next 14 bits is an delta value from the code point
101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *      2..15   -  Contains data as described by bit 1. If all bits are set
102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Mapping Table:
106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * The data in mapping table is sorted according to the length of the mapping sequence.
107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * is compared with start indexes of sequence length start to figure out the length according to
109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * the following algorithm:
110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   length = 1;
114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   length = 2;
117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   length = 3;
120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *               }else{
121ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   // The first position in the mapping table contains the length
122ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   // of the sequence
123ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *                   length = mappingTable[index++];
124ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
125ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *               }
126ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
127ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
128ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
129ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* file data ---------------------------------------------------------------- */
130ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* indexes[] value names */
131ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
132ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if UCONFIG_NO_IDNA
133ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
134ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* dummy UDataInfo cf. udata.h */
135ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UDataInfo dataInfo = {
136ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UDataInfo),
137ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_IS_BIG_ENDIAN,
140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_CHARSET_FAMILY,
141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_SIZEOF_UCHAR,
142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy dataFormat */
145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0, 0, 0, 0 },                 /* dummy formatVersion */
146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0, 0, 0, 0 }                  /* dummy dataVersion */
147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint16_t* mappingData= NULL;
154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int16_t currentIndex = 0; /* the current index into the data trie */
156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t maxLength = 0;  /* maximum length of mapping string */
157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UDataInfo cf. udata.h */
160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UDataInfo dataInfo={
161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sizeof(UDataInfo),
162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_IS_BIG_ENDIAN,
165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_CHARSET_FAMILY,
166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    U_SIZEOF_UCHAR,
167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    0,
168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru};
173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetUnicodeVersion(const char *v) {
175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UVersionInfo version;
176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    u_versionFromString(version, v);
177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_memcpy(dataInfo.dataVersion, version, 4);
178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruvoid
181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetUnicodeVersionNC(UVersionInfo version){
182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t univer = version[0] << 24;
183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    univer += version[1] << 16;
184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    univer += version[2] << 8;
185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    univer += version[3];
186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UNewTrie *sprepTrie;
189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define MAX_DATA_LENGTH 11500
191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
192ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
193ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
194ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
195ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
196ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
197ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void
198ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruinit() {
199ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
200103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie));
201ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
202ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* initialize the two tries */
203ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
204ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "error: failed to initialize tries\n");
205ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(U_MEMORY_ALLOCATION_ERROR);
206ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
207ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
208ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UHashtable* hashTable = NULL;
210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutypedef struct ValueStruct {
213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* mapping;
214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int16_t length;
215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UStringPrepType type;
216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} ValueStruct;
217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for deleting the value from the hashtable */
219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void U_CALLCONV valueDeleter(void* obj){
220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ValueStruct* value = (ValueStruct*) obj;
221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(value->mapping);
222ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(value);
223ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for hashing the entry */
226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic int32_t U_CALLCONV hashEntry(const UHashTok parm) {
227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return  parm.integer;
228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Callback for comparing two entries */
231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return (UBool)(p1.integer != p2.integer);
233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic void
237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreMappingData(){
238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t pos = -1;
240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    const UHashElement* element = NULL;
241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ValueStruct* value  = NULL;
242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t codepoint = 0;
24385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    int32_t elementCount = 0;
244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t writtenElementCount = 0;
245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t mappingLength = 1; /* minimum mapping length */
246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t oldMappingLength = 0;
247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint16_t trieWord =0;
248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t limitIndex = 0;
249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
25085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if (hashTable == NULL) {
25185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        return;
25285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
25385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    elementCount = uhash_count(hashTable);
25485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho
25585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho	/*initialize the mapping data */
256103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);
257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(writtenElementCount < elementCount){
259ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            codepoint = element->key.integer;
263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            value = (ValueStruct*)element->value.pointer;
264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* store the start of indexes */
266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(oldMappingLength != mappingLength){
267ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* Assume that index[] is used according to the enums defined */
268ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
269ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
270ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
271ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
273ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
274ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    limitIndex = currentIndex;
275ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
276ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                oldMappingLength = mappingLength;
278ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
279ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
280ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(value->length == mappingLength){
281ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                uint32_t savedTrieWord = 0;
282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                trieWord = currentIndex << 2;
283ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* turn on the 2nd bit to signal that the following bits contain an index */
284ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                trieWord += 0x02;
285ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
286ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(trieWord > _SPREP_TYPE_THRESHOLD){
287ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
288ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    exit(U_ILLEGAL_CHAR_FOUND);
289ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
290ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* figure out if the code point has type already stored */
291ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
292ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(savedTrieWord!=0){
293ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
294ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /* turn on the first bit in trie word */
295ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        trieWord += 0x01;
296ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }else{
297ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        /*
298ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         * the codepoint has value something other than prohibited
299ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         * and a mapping .. error!
300ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                         */
301ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
302ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        exit(U_ILLEGAL_ARGUMENT_ERROR);
303ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
304ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
305ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
306ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* now set the value in the trie */
307ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
308ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fprintf(stderr,"Could not set the value for code point.\n");
309ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    exit(U_ILLEGAL_ARGUMENT_ERROR);
310ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
311ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
312ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* written the trie word for the codepoint... increment the count*/
313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                writtenElementCount++;
314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
315ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* sanity check are we exceeding the max number allowed */
316ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
31754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n",
31854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                        currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
319ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
320ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
321ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* copy the mapping data */
32354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                /* write the length */
32454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
32554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                     /* the cast here is safe since we donot expect the length to be > 65535 */
32654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                     mappingData[currentIndex++] = (uint16_t) mappingLength;
32754dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                }
32854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                /* copy the contents to mappindData array */
32954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
33054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                currentIndex += value->length;
33154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                if (currentIndex > mappingDataCapacity) {
33254dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                    /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */
33354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                    fprintf(stderr, "gensprep, fatal error at %s, %d.  Aborting.\n", __FILE__, __LINE__);
33454dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius                    exit(U_INTERNAL_PROGRAM_ERROR);
335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        mappingLength++;
339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        pos = -1;
340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* set the last length for range check */
342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void setOptions(int32_t options){
351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    indexes[_SPREP_OPTIONS] = options;
352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void
354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             UStringPrepType type, UErrorCode* status){
356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar* map = NULL;
35954dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    int16_t adjustedLen=0, i, j;
360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint16_t trieWord = 0;
361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    ValueStruct *value = NULL;
362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint32_t savedTrieWord = 0;
363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* initialize the hashtable */
365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(hashTable==NULL){
366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uhash_setValueDeleter(hashTable, valueDeleter);
368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* figure out if the code point has type already stored */
371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(savedTrieWord!=0){
373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* turn on the first bit in trie word */
375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            trieWord += 0x01;
376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }else{
377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /*
378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * the codepoint has value something other than prohibited
379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             * and a mapping .. error!
380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru             */
381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(U_ILLEGAL_ARGUMENT_ERROR);
383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* figure out the real length */
387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    for(i=0; i<length; i++){
38854dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        adjustedLen += U16_LENGTH(mapping[i]);
389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(adjustedLen == 0){
392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* make sure that the value of trieWord is less than the threshold */
394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(trieWord < _SPREP_TYPE_THRESHOLD){
395ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* now set the value in the trie */
396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"Could not set the value for code point.\n");
398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                exit(U_ILLEGAL_ARGUMENT_ERROR);
399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* value is set so just return */
401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return;
402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }else{
403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
404ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(U_ILLEGAL_CHAR_FOUND);
405ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
406ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
407ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
408ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(adjustedLen == 1){
409ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /* calculate the delta */
410ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
411ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
412ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
413ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            trieWord = delta << 2;
414ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
415ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
416ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* make sure that the second bit is OFF */
417ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if((trieWord & 0x02) != 0 ){
418ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
419ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                exit(U_INTERNAL_PROGRAM_ERROR);
420ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
421ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* make sure that the value of trieWord is less than the threshold */
422ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(trieWord < _SPREP_TYPE_THRESHOLD){
423ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* now set the value in the trie */
424ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
425ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fprintf(stderr,"Could not set the value for code point.\n");
426ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    exit(U_ILLEGAL_ARGUMENT_ERROR);
427ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
428ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* value is set so just return */
429ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                return;
430ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        /*
433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * if the delta is not in the given range or if the trieWord is larger than the threshold
434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         * just fall through for storing the mapping in the mapping table
435ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru         */
436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
438103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
44054dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    for (i=0, j=0; i<length; i++) {
44154dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius        U16_APPEND_UNSAFE(map, j, mapping[i]);
442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
445ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value->mapping = map;
44654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    value->type    = type;
447ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    value->length  = adjustedLen;
448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
449ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        mappingDataCapacity++;
450ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
451ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(maxLength < value->length){
452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        maxLength = value->length;
453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uhash_iput(hashTable,codepoint,value,status);
455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    mappingDataCapacity += adjustedLen;
456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(*status)){
458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(*status);
460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
463ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void
465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerustoreRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uint16_t trieWord = 0;
467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(U_ILLEGAL_CHAR_FOUND);
471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(start == end){
474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(savedTrieWord>0){
476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /*
478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * A mapping is stored in the trie word
479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * and the only other possible type that a
480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 * code point can have is USPREP_PROHIBITED
481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 *
482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                 */
483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* turn on the 0th bit in the savedTrieWord */
485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                savedTrieWord += 0x01;
486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* the downcast is safe since we only save 16 bit values */
488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                trieWord = (uint16_t)savedTrieWord;
489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                /* make sure that the value of trieWord is less than the threshold */
491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                if(trieWord < _SPREP_TYPE_THRESHOLD){
492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* now set the value in the trie */
493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    if(!utrie_set32(sprepTrie,start,trieWord)){
494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        fprintf(stderr,"Could not set the value for code point.\n");
495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                        exit(U_ILLEGAL_ARGUMENT_ERROR);
496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    }
497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    /* value is set so just return */
498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    return;
499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }else{
500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                    exit(U_ILLEGAL_CHAR_FOUND);
502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                }
503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }else if(savedTrieWord != trieWord){
505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                exit(U_ILLEGAL_ARGUMENT_ERROR);
507ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            }
508ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            /* if savedTrieWord == trieWord .. fall through and set the value */
509ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
510ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!utrie_set32(sprepTrie,start,trieWord)){
511ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
512ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(U_ILLEGAL_ARGUMENT_ERROR);
513ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
514ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }else{
515ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
516ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            fprintf(stderr,"Value for certain codepoint already set.\n");
517ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            exit(U_ILLEGAL_CHAR_FOUND);
518ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
519ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
520ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
521ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
522ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
523ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* folding value: just store the offset (16 bits) if there is any non-0 entry */
524ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic uint32_t U_CALLCONV
525ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
526103e9ffba2cba345d0078eb8b8db33249f81840aCraig Cornelius    uint32_t value;
527ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UChar32 limit=0;
528ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UBool inBlockZero;
529ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
530ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    limit=start+0x400;
531ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    while(start<limit) {
532ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        value=utrie_get32(trie, start, &inBlockZero);
533ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        if(inBlockZero) {
534ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            start+=UTRIE_DATA_BLOCK_LENGTH;
535ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else if(value!=0) {
536ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            return (uint32_t)offset;
537ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        } else {
538ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            ++start;
539ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        }
540ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
541ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    return 0;
542ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
543ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
544ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
545ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */
546ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
547ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void
548ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateData(const char *dataDir, const char* bundleName) {
549ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    static uint8_t sprepTrieBlock[100000];
550ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
551ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UNewDataMemory *pData;
552ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    UErrorCode errorCode=U_ZERO_ERROR;
553ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t size, dataLength;
554ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
555ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
556ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if UCONFIG_NO_IDNA
557ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
558ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    size=0;
559ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
560ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#else
561ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
562ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    int32_t sprepTrieSize;
563ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
564ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* sort and add mapping data */
565ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    storeMappingData();
566ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
567ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
568ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
569ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
570ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(errorCode);
571ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
572ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
573ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
574ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(beVerbose) {
575ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
576ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
577ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
578ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
579ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
580ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
581ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
582ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
583ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
584ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    fileName[0]=0;
585ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_strcat(fileName,bundleName);
586ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* write the data */
587ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
588ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
589ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
590ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
591ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(errorCode);
592ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
593ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
594ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA
595ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
596ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
597ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
598ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
599ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    udata_writeBlock(pData, indexes, sizeof(indexes));
600ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
601ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
602ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
603ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
604ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
605ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
606ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* finish up */
607ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    dataLength=udata_finish(pData, &errorCode);
608ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(U_FAILURE(errorCode)) {
609ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
610ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(errorCode);
611ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
612ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
613ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    if(dataLength!=size) {
614ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
615ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru            (long)dataLength, (long)size);
616ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru        exit(U_INTERNAL_PROGRAM_ERROR);
617ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    }
618ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
619ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA
620ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    /* done with writing the data .. close the hashtable */
62185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    if (hashTable != NULL) {
62285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho        uhash_close(hashTable);
62385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho    }
624ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif
62554dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius
62654dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    uprv_free(fileName);
627ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
628ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
629ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#if !UCONFIG_NO_IDNA
630ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
631ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void
632ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerucleanUpData(void) {
63354dcd9b6a06071f647dac967e9e267abb9410720Craig Cornelius    uprv_free(mappingData);
634ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    utrie_close(sprepTrie);
635ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru    uprv_free(sprepTrie);
636ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}
637ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
638ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#endif /* #if !UCONFIG_NO_IDNA */
639ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru
640ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/*
641ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Hey, Emacs, please set the following:
642ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
643ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Local Variables:
644ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indent-tabs-mode: nil
645ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * End:
646ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru *
647ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */
648