1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2009, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  store.c
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2003-02-06
14*   created by: Ram Viswanadha
15*
16*/
17
18#include <stdio.h>
19#include <stdlib.h>
20#include "unicode/utypes.h"
21#include "cmemory.h"
22#include "cstring.h"
23#include "filestrm.h"
24#include "unicode/udata.h"
25#include "utrie.h"
26#include "unewdata.h"
27#include "gensprep.h"
28#include "uhash.h"
29
30
31#define DO_DEBUG_OUT 0
32
33
34/*
35 * StringPrep profile file format ------------------------------------
36 *
37 * The file format prepared and written here contains a 16-bit trie and a mapping table.
38 *
39 * Before the data contents described below, there are the headers required by
40 * the udata API for loading ICU data. Especially, a UDataInfo structure
41 * precedes the actual data. It contains platform properties values and the
42 * file format version.
43 *
44 * The following is a description of format version 2.
45 *
46 * Data contents:
47 *
48 * The contents is a parsed, binary form of RFC3454 and possibly
49 * NormalizationCorrections.txt depending on the options specified on the profile.
50 *
51 * Any Unicode code point from 0 to 0x10ffff can be looked up to get
52 * the trie-word, if any, for that code point. This means that the input
53 * to the lookup are 21-bit unsigned integers, with not all of the
54 * 21-bit range used.
55 *
56 * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
57 * After that there are the following structures:
58 *
59 * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
60 *
61 * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
62 *
63 * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to
64 *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
65 *
66 * The indexes array contains the following values:
67 *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
68 *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
69 *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
70 *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
71 *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
72 *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
73 *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
74 *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
75 *
76 *
77 * StringPrep Trie :
78 *
79 * The StringPrep tries is a 16-bit trie that contains data for the profile.
80 * Each code point is associated with a value (trie-word) in the trie.
81 *
82 * - structure of data words from the trie
83 *
84 *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
85 *      represents the type associated with the code point
86 *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
87 *          type = trieWord - 0xFFF0;
88 *      }
89 *      The type can be :
90 *             USPREP_UNASSIGNED
91 *             USPREP_PROHIBITED
92 *             USPREP_DELETE
93 *
94 *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
95 *      contains distribution described below
96 *
97 *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
98 *      1       -  ON : The value in the next 14 bits is an index into the mapping table
99 *                 OFF: The value in the next 14 bits is an delta value from the code point
100 *      2..15   -  Contains data as described by bit 1. If all bits are set
101 *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
102 *
103 *
104 * Mapping Table:
105 * The data in mapping table is sorted according to the length of the mapping sequence.
106 * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
107 * is compared with start indexes of sequence length start to figure out the length according to
108 * the following algorithm:
109 *
110 *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
111 *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
112 *                   length = 1;
113 *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
114 *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
115 *                   length = 2;
116 *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
117 *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
118 *                   length = 3;
119 *               }else{
120 *                   // The first position in the mapping table contains the length
121 *                   // of the sequence
122 *                   length = mappingTable[index++];
123 *
124 *               }
125 *
126 */
127
128/* file data ---------------------------------------------------------------- */
129/* indexes[] value names */
130
131#if UCONFIG_NO_IDNA
132
133/* dummy UDataInfo cf. udata.h */
134static UDataInfo dataInfo = {
135    sizeof(UDataInfo),
136    0,
137
138    U_IS_BIG_ENDIAN,
139    U_CHARSET_FAMILY,
140    U_SIZEOF_UCHAR,
141    0,
142
143    { 0, 0, 0, 0 },                 /* dummy dataFormat */
144    { 0, 0, 0, 0 },                 /* dummy formatVersion */
145    { 0, 0, 0, 0 }                  /* dummy dataVersion */
146};
147
148#else
149
150static int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
151
152static uint16_t* mappingData= NULL;
153static int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
154static int16_t currentIndex = 0; /* the current index into the data trie */
155static int32_t maxLength = 0;  /* maximum length of mapping string */
156
157
158/* UDataInfo cf. udata.h */
159static UDataInfo dataInfo={
160    sizeof(UDataInfo),
161    0,
162
163    U_IS_BIG_ENDIAN,
164    U_CHARSET_FAMILY,
165    U_SIZEOF_UCHAR,
166    0,
167
168    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
169    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
170    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
171};
172void
173setUnicodeVersion(const char *v) {
174    UVersionInfo version;
175    u_versionFromString(version, v);
176    uprv_memcpy(dataInfo.dataVersion, version, 4);
177}
178
179void
180setUnicodeVersionNC(UVersionInfo version){
181    uint32_t univer = version[0] << 24;
182    univer += version[1] << 16;
183    univer += version[2] << 8;
184    univer += version[3];
185    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
186}
187static UNewTrie *sprepTrie;
188
189#define MAX_DATA_LENGTH 11500
190
191
192#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
193#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
194
195
196extern void
197init() {
198
199    sprepTrie = (UNewTrie *)uprv_malloc(sizeof(UNewTrie));
200    uprv_memset(sprepTrie, 0, sizeof(UNewTrie));
201
202    /* initialize the two tries */
203    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
204        fprintf(stderr, "error: failed to initialize tries\n");
205        exit(U_MEMORY_ALLOCATION_ERROR);
206    }
207}
208
209static UHashtable* hashTable = NULL;
210
211
212typedef struct ValueStruct {
213    UChar* mapping;
214    int16_t length;
215    UStringPrepType type;
216} ValueStruct;
217
218/* Callback for deleting the value from the hashtable */
219static void U_CALLCONV valueDeleter(void* obj){
220    ValueStruct* value = (ValueStruct*) obj;
221    uprv_free(value->mapping);
222    uprv_free(value);
223}
224
225/* Callback for hashing the entry */
226static int32_t U_CALLCONV hashEntry(const UHashTok parm) {
227    return  parm.integer;
228}
229
230/* Callback for comparing two entries */
231static UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
232    return (UBool)(p1.integer != p2.integer);
233}
234
235
236static void
237storeMappingData(){
238
239    int32_t pos = -1;
240    const UHashElement* element = NULL;
241    ValueStruct* value  = NULL;
242    int32_t codepoint = 0;
243    int32_t elementCount = 0;
244    int32_t writtenElementCount = 0;
245    int32_t mappingLength = 1; /* minimum mapping length */
246    int32_t oldMappingLength = 0;
247    uint16_t trieWord =0;
248    int32_t limitIndex = 0;
249
250    if (hashTable == NULL) {
251        return;
252    }
253    elementCount = uhash_count(hashTable);
254
255	/*initialize the mapping data */
256    mappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * (mappingDataCapacity));
257
258    uprv_memset(mappingData,0,U_SIZEOF_UCHAR * mappingDataCapacity);
259
260    while(writtenElementCount < elementCount){
261
262        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
263
264            codepoint = element->key.integer;
265            value = (ValueStruct*)element->value.pointer;
266
267            /* store the start of indexes */
268            if(oldMappingLength != mappingLength){
269                /* Assume that index[] is used according to the enums defined */
270                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
271                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
272                }
273                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
274                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
275
276                    limitIndex = currentIndex;
277
278                }
279                oldMappingLength = mappingLength;
280            }
281
282            if(value->length == mappingLength){
283                uint32_t savedTrieWord = 0;
284                trieWord = currentIndex << 2;
285                /* turn on the 2nd bit to signal that the following bits contain an index */
286                trieWord += 0x02;
287
288                if(trieWord > _SPREP_TYPE_THRESHOLD){
289                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
290                    exit(U_ILLEGAL_CHAR_FOUND);
291                }
292                /* figure out if the code point has type already stored */
293                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
294                if(savedTrieWord!=0){
295                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
296                        /* turn on the first bit in trie word */
297                        trieWord += 0x01;
298                    }else{
299                        /*
300                         * the codepoint has value something other than prohibited
301                         * and a mapping .. error!
302                         */
303                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
304                        exit(U_ILLEGAL_ARGUMENT_ERROR);
305                    }
306                }
307
308                /* now set the value in the trie */
309                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
310                    fprintf(stderr,"Could not set the value for code point.\n");
311                    exit(U_ILLEGAL_ARGUMENT_ERROR);
312                }
313
314                /* written the trie word for the codepoint... increment the count*/
315                writtenElementCount++;
316
317                /* sanity check are we exceeding the max number allowed */
318                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
319                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n", currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
320                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
321                }
322
323                /* copy the mapping data */
324                if(currentIndex+value->length+1 <= mappingDataCapacity){
325                    /* write the length */
326                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
327                         /* the cast here is safe since we donot expect the length to be > 65535 */
328                         mappingData[currentIndex++] = (uint16_t) mappingLength;
329                    }
330                    /* copy the contents to mappindData array */
331                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
332                    currentIndex += value->length;
333
334                }else{
335                    /* realloc */
336                    UChar* newMappingData = (uint16_t*) uprv_malloc(U_SIZEOF_UCHAR * mappingDataCapacity*2);
337                    if(newMappingData == NULL){
338                        fprintf(stderr, "Could not realloc the mapping data!\n");
339                        exit(U_MEMORY_ALLOCATION_ERROR);
340                    }
341                    uprv_memmove(newMappingData, mappingData, U_SIZEOF_UCHAR * mappingDataCapacity);
342                    mappingDataCapacity *= 2;
343                    uprv_free(mappingData);
344                    mappingData = newMappingData;
345                    /* write the length */
346                    if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
347                         /* the cast here is safe since we donot expect the length to be > 65535 */
348                         mappingData[currentIndex++] = (uint16_t) mappingLength;
349                    }
350                    /* continue copying */
351                    uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
352                    currentIndex += value->length;
353                }
354
355            }
356        }
357        mappingLength++;
358        pos = -1;
359    }
360    /* set the last length for range check */
361    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
362        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
363    }else{
364        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
365    }
366
367}
368
369extern void setOptions(int32_t options){
370    indexes[_SPREP_OPTIONS] = options;
371}
372extern void
373storeMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
374             UStringPrepType type, UErrorCode* status){
375
376
377    UChar* map = NULL;
378    int16_t adjustedLen=0, i;
379    uint16_t trieWord = 0;
380    ValueStruct *value = NULL;
381    uint32_t savedTrieWord = 0;
382
383    /* initialize the hashtable */
384    if(hashTable==NULL){
385        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
386        uhash_setValueDeleter(hashTable, valueDeleter);
387    }
388
389    /* figure out if the code point has type already stored */
390    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
391    if(savedTrieWord!=0){
392        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
393            /* turn on the first bit in trie word */
394            trieWord += 0x01;
395        }else{
396            /*
397             * the codepoint has value something other than prohibited
398             * and a mapping .. error!
399             */
400            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
401            exit(U_ILLEGAL_ARGUMENT_ERROR);
402        }
403    }
404
405    /* figure out the real length */
406    for(i=0; i<length; i++){
407        if(mapping[i] > 0xFFFF){
408            adjustedLen +=2;
409        }else{
410            adjustedLen++;
411        }
412    }
413
414    if(adjustedLen == 0){
415        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
416        /* make sure that the value of trieWord is less than the threshold */
417        if(trieWord < _SPREP_TYPE_THRESHOLD){
418            /* now set the value in the trie */
419            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
420                fprintf(stderr,"Could not set the value for code point.\n");
421                exit(U_ILLEGAL_ARGUMENT_ERROR);
422            }
423            /* value is set so just return */
424            return;
425        }else{
426            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
427            exit(U_ILLEGAL_CHAR_FOUND);
428        }
429    }
430
431    if(adjustedLen == 1){
432        /* calculate the delta */
433        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
434        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
435
436            trieWord = delta << 2;
437
438
439            /* make sure that the second bit is OFF */
440            if((trieWord & 0x02) != 0 ){
441                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
442                exit(U_INTERNAL_PROGRAM_ERROR);
443            }
444            /* make sure that the value of trieWord is less than the threshold */
445            if(trieWord < _SPREP_TYPE_THRESHOLD){
446                /* now set the value in the trie */
447                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
448                    fprintf(stderr,"Could not set the value for code point.\n");
449                    exit(U_ILLEGAL_ARGUMENT_ERROR);
450                }
451                /* value is set so just return */
452                return;
453            }
454        }
455        /*
456         * if the delta is not in the given range or if the trieWord is larger than the threshold
457         * just fall through for storing the mapping in the mapping table
458         */
459    }
460
461    map = (UChar*) uprv_malloc(U_SIZEOF_UCHAR * (adjustedLen+1));
462    uprv_memset(map,0,U_SIZEOF_UCHAR * (adjustedLen+1));
463
464    i=0;
465
466    while(i<length){
467        if(mapping[i] <= 0xFFFF){
468            map[i] = (uint16_t)mapping[i];
469        }else{
470            map[i]   = UTF16_LEAD(mapping[i]);
471            map[i+1] = UTF16_TRAIL(mapping[i]);
472        }
473        i++;
474    }
475
476    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
477    value->mapping = map;
478    value->type   = type;
479    value->length  = adjustedLen;
480    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
481        mappingDataCapacity++;
482    }
483    if(maxLength < value->length){
484        maxLength = value->length;
485    }
486    uhash_iput(hashTable,codepoint,value,status);
487    mappingDataCapacity += adjustedLen;
488
489    if(U_FAILURE(*status)){
490        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
491        exit(*status);
492    }
493}
494
495
496extern void
497storeRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
498    uint16_t trieWord = 0;
499
500    if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
501        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
502        exit(U_ILLEGAL_CHAR_FOUND);
503    }
504    trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
505    if(start == end){
506        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
507        if(savedTrieWord>0){
508            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
509                /*
510                 * A mapping is stored in the trie word
511                 * and the only other possible type that a
512                 * code point can have is USPREP_PROHIBITED
513                 *
514                 */
515
516                /* turn on the 0th bit in the savedTrieWord */
517                savedTrieWord += 0x01;
518
519                /* the downcast is safe since we only save 16 bit values */
520                trieWord = (uint16_t)savedTrieWord;
521
522                /* make sure that the value of trieWord is less than the threshold */
523                if(trieWord < _SPREP_TYPE_THRESHOLD){
524                    /* now set the value in the trie */
525                    if(!utrie_set32(sprepTrie,start,trieWord)){
526                        fprintf(stderr,"Could not set the value for code point.\n");
527                        exit(U_ILLEGAL_ARGUMENT_ERROR);
528                    }
529                    /* value is set so just return */
530                    return;
531                }else{
532                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
533                    exit(U_ILLEGAL_CHAR_FOUND);
534                }
535
536            }else if(savedTrieWord != trieWord){
537                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
538                exit(U_ILLEGAL_ARGUMENT_ERROR);
539            }
540            /* if savedTrieWord == trieWord .. fall through and set the value */
541        }
542        if(!utrie_set32(sprepTrie,start,trieWord)){
543            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
544            exit(U_ILLEGAL_ARGUMENT_ERROR);
545        }
546    }else{
547        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
548            fprintf(stderr,"Value for certain codepoint already set.\n");
549            exit(U_ILLEGAL_CHAR_FOUND);
550        }
551    }
552
553}
554
555/* folding value: just store the offset (16 bits) if there is any non-0 entry */
556static uint32_t U_CALLCONV
557getFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
558    uint32_t foldedValue, value;
559    UChar32 limit=0;
560    UBool inBlockZero;
561
562    foldedValue=0;
563
564    limit=start+0x400;
565    while(start<limit) {
566        value=utrie_get32(trie, start, &inBlockZero);
567        if(inBlockZero) {
568            start+=UTRIE_DATA_BLOCK_LENGTH;
569        } else if(value!=0) {
570            return (uint32_t)offset;
571        } else {
572            ++start;
573        }
574    }
575    return 0;
576
577}
578
579#endif /* #if !UCONFIG_NO_IDNA */
580
581extern void
582generateData(const char *dataDir, const char* bundleName) {
583    static uint8_t sprepTrieBlock[100000];
584
585    UNewDataMemory *pData;
586    UErrorCode errorCode=U_ZERO_ERROR;
587    int32_t size, dataLength;
588    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
589
590#if UCONFIG_NO_IDNA
591
592    size=0;
593
594#else
595
596    int32_t sprepTrieSize;
597
598    /* sort and add mapping data */
599    storeMappingData();
600
601    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
602    if(U_FAILURE(errorCode)) {
603        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
604        exit(errorCode);
605    }
606
607    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
608    if(beVerbose) {
609        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
610        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
611        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
612        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
613        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
614    }
615
616#endif
617
618    fileName[0]=0;
619    uprv_strcat(fileName,bundleName);
620    /* write the data */
621    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
622                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
623    if(U_FAILURE(errorCode)) {
624        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
625        exit(errorCode);
626    }
627
628#if !UCONFIG_NO_IDNA
629
630    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
631    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
632
633    udata_writeBlock(pData, indexes, sizeof(indexes));
634    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
635    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
636
637
638#endif
639
640    /* finish up */
641    dataLength=udata_finish(pData, &errorCode);
642    if(U_FAILURE(errorCode)) {
643        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
644        exit(errorCode);
645    }
646
647    if(dataLength!=size) {
648        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
649            (long)dataLength, (long)size);
650        exit(U_INTERNAL_PROGRAM_ERROR);
651    }
652
653#if !UCONFIG_NO_IDNA
654    /* done with writing the data .. close the hashtable */
655    if (hashTable != NULL) {
656        uhash_close(hashTable);
657    }
658#endif
659}
660
661#if !UCONFIG_NO_IDNA
662
663extern void
664cleanUpData(void) {
665
666    utrie_close(sprepTrie);
667    uprv_free(sprepTrie);
668}
669
670#endif /* #if !UCONFIG_NO_IDNA */
671
672/*
673 * Hey, Emacs, please set the following:
674 *
675 * Local Variables:
676 * indent-tabs-mode: nil
677 * End:
678 *
679 */
680