16f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
26f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
36f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
46f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Copyright (C) 1999-2012, International Business Machines
56f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   Corporation and others.  All Rights Reserved.
66f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
76f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*******************************************************************************
86f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   file name:  store.c
96f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   encoding:   US-ASCII
106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   tab size:   8 (not used)
116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   indentation:4
126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created on: 2003-02-06
146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*   created by: Ram Viswanadha
156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*
166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org*/
176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdio.h>
196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include <stdlib.h>
206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utypes.h"
216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cmemory.h"
226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "cstring.h"
236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "filestrm.h"
246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/udata.h"
256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unicode/utf16.h"
266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "utrie.h"
276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "unewdata.h"
286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "gensprep.h"
296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#include "uhash.h"
306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define DO_DEBUG_OUT 0
336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * StringPrep profile file format ------------------------------------
376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The file format prepared and written here contains a 16-bit trie and a mapping table.
396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Before the data contents described below, there are the headers required by
416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the udata API for loading ICU data. Especially, a UDataInfo structure
426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * precedes the actual data. It contains platform properties values and the
436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * file format version.
446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The following is a description of format version 2.
466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Data contents:
486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The contents is a parsed, binary form of RFC3454 and possibly
506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * NormalizationCorrections.txt depending on the options specified on the profile.
516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Any Unicode code point from 0 to 0x10ffff can be looked up to get
536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the trie-word, if any, for that code point. This means that the input
546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * to the lookup are 21-bit unsigned integers, with not all of the
556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * 21-bit range used.
566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * *.spp files customarily begin with a UDataInfo structure, see udata.h and .c.
586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * After that there are the following structures:
596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * int32_t indexes[_SPREP_INDEX_TOP];           -- _SPREP_INDEX_TOP=16, see enum in sprpimpl.h file
616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * UTrie stringPrepTrie;                        -- size in bytes=indexes[_SPREP_INDEX_TRIE_SIZE]
636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * uint16_t mappingTable[];                     -- Contains the sequecence of code units that the code point maps to
656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                                                 size in bytes = indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]
666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The indexes array contains the following values:
686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_INDEX_TRIE_SIZE]                  -- The size of the StringPrep trie in bytes
696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]          -- The size of the mappingTable in bytes
706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION]  -- The index of Unicode version of last entry in NormalizationCorrections.txt
716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START]    -- The starting index of 1 UChar  mapping index in the mapping table
726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]   -- The starting index of 2 UChars mapping index in the mapping table
736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] -- The starting index of 3 UChars mapping index in the mapping table
746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]  -- The starting index of 4 UChars mapping index in the mapping table
756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  indexes[_SPREP_OPTIONS]                          -- Bit set of options to turn on in the profile, e.g: USPREP_NORMALIZATION_ON, USPREP_CHECK_BIDI_ON
766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * StringPrep Trie :
796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The StringPrep tries is a 16-bit trie that contains data for the profile.
816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Each code point is associated with a value (trie-word) in the trie.
826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * - structure of data words from the trie
846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  i)  A value greater than or equal to _SPREP_TYPE_THRESHOLD (0xFFF0)
866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      represents the type associated with the code point
876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      if(trieWord >= _SPREP_TYPE_THRESHOLD){
886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *          type = trieWord - 0xFFF0;
896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      }
906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      The type can be :
916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *             USPREP_UNASSIGNED
926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *             USPREP_PROHIBITED
936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *             USPREP_DELETE
946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *  ii) A value less than _SPREP_TYPE_THRESHOLD means the type is USPREP_MAP and
966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      contains distribution described below
976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      0       -  ON : The code point is prohibited (USPREP_PROHIBITED). This is to allow for codepoint that are both prohibited and mapped.
996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      1       -  ON : The value in the next 14 bits is an index into the mapping table
1006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                 OFF: The value in the next 14 bits is an delta value from the code point
1016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *      2..15   -  Contains data as described by bit 1. If all bits are set
1026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                 (value = _SPREP_MAX_INDEX_VALUE) then the type is USPREP_DELETE
1036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Mapping Table:
1066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * The data in mapping table is sorted according to the length of the mapping sequence.
1076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * If the type of the code point is USPREP_MAP and value in trie word is an index, the index
1086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * is compared with start indexes of sequence length start to figure out the length according to
1096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * the following algorithm:
1106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *              if(       index >= indexes[_SPREP_ONE_UCHAR_MAPPING_INDEX_START] &&
1126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                        index < indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START]){
1136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   length = 1;
1146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *               }else if(index >= indexes[_SPREP_TWO_UCHARS_MAPPING_INDEX_START] &&
1156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                        index < indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START]){
1166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   length = 2;
1176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *               }else if(index >= indexes[_SPREP_THREE_UCHARS_MAPPING_INDEX_START] &&
1186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                        index < indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START]){
1196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   length = 3;
1206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *               }else{
1216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   // The first position in the mapping table contains the length
1226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   // of the sequence
1236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *                   length = mappingTable[index++];
1246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *               }
1266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
1276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
1286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* file data ---------------------------------------------------------------- */
1306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* indexes[] value names */
1316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_IDNA
1336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* dummy UDataInfo cf. udata.h */
1356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UDataInfo dataInfo = {
1366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UDataInfo),
1376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
1386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_IS_BIG_ENDIAN,
1406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_CHARSET_FAMILY,
1416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_SIZEOF_UCHAR,
1426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
1436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0, 0, 0, 0 },                 /* dummy dataFormat */
1456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0, 0, 0, 0 },                 /* dummy formatVersion */
1466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0, 0, 0, 0 }                  /* dummy dataVersion */
1476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
1486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
1506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t indexes[_SPREP_INDEX_TOP]={ 0 };
1526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint16_t* mappingData= NULL;
1546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t mappingDataCapacity = 0; /* we skip the first index in mapping data */
1556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int16_t currentIndex = 0; /* the current index into the data trie */
1566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t maxLength = 0;  /* maximum length of mapping string */
1576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* UDataInfo cf. udata.h */
1606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UDataInfo dataInfo={
1616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sizeof(UDataInfo),
1626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
1636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_IS_BIG_ENDIAN,
1656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_CHARSET_FAMILY,
1666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    U_SIZEOF_UCHAR,
1676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    0,
1686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 0x53, 0x50, 0x52, 0x50 },                 /* dataFormat="SPRP" */
1706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 3, 2, UTRIE_SHIFT, UTRIE_INDEX_SHIFT },   /* formatVersion */
1716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    { 3, 2, 0, 0 }                              /* dataVersion (Unicode version) */
1726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org};
1736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
1746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsetUnicodeVersion(const char *v) {
1756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UVersionInfo version;
1766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    u_versionFromString(version, v);
1776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_memcpy(dataInfo.dataVersion, version, 4);
1786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgvoid
1816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgsetUnicodeVersionNC(UVersionInfo version){
1826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t univer = version[0] << 24;
1836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    univer += version[1] << 16;
1846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    univer += version[2] << 8;
1856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    univer += version[3];
1866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION] = univer;
1876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
1886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UNewTrie *sprepTrie;
1896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define MAX_DATA_LENGTH 11500
1916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SPREP_DELTA_RANGE_POSITIVE_LIMIT              8191
1946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#define SPREP_DELTA_RANGE_NEGATIVE_LIMIT              -8192
1956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
1976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void
1986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orginit() {
1996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sprepTrie = (UNewTrie *)uprv_calloc(1, sizeof(UNewTrie));
2016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* initialize the two tries */
2036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(NULL==utrie_open(sprepTrie, NULL, MAX_DATA_LENGTH, 0, 0, FALSE)) {
2046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "error: failed to initialize tries\n");
2056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(U_MEMORY_ALLOCATION_ERROR);
2066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UHashtable* hashTable = NULL;
2106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgtypedef struct ValueStruct {
2136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar* mapping;
2146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int16_t length;
2156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UStringPrepType type;
2166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org} ValueStruct;
2176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for deleting the value from the hashtable */
2196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void U_CALLCONV valueDeleter(void* obj){
2206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ValueStruct* value = (ValueStruct*) obj;
2216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(value->mapping);
2226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(value);
2236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for hashing the entry */
2266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic int32_t U_CALLCONV hashEntry(const UHashTok parm) {
2276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return  parm.integer;
2286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* Callback for comparing two entries */
2316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic UBool U_CALLCONV compareEntries(const UHashTok p1, const UHashTok p2) {
2326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return (UBool)(p1.integer != p2.integer);
2336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
2346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic void
2376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreMappingData(){
2386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t pos = -1;
2406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    const UHashElement* element = NULL;
2416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ValueStruct* value  = NULL;
2426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t codepoint = 0;
2436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t elementCount = 0;
2446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t writtenElementCount = 0;
2456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t mappingLength = 1; /* minimum mapping length */
2466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t oldMappingLength = 0;
2476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t trieWord =0;
2486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t limitIndex = 0;
2496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (hashTable == NULL) {
2516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        return;
2526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
2536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    elementCount = uhash_count(hashTable);
2546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org	/*initialize the mapping data */
2566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    mappingData = (uint16_t*) uprv_calloc(mappingDataCapacity, U_SIZEOF_UCHAR);
2576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(writtenElementCount < elementCount){
2596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        while( (element = uhash_nextElement(hashTable, &pos))!=NULL){
2616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            codepoint = element->key.integer;
2636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            value = (ValueStruct*)element->value.pointer;
2646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* store the start of indexes */
2666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(oldMappingLength != mappingLength){
2676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* Assume that index[] is used according to the enums defined */
2686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(oldMappingLength <=_SPREP_MAX_INDEX_TOP_LENGTH){
2696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex;
2706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(oldMappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH &&
2726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                   mappingLength == _SPREP_MAX_INDEX_TOP_LENGTH +1){
2736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    limitIndex = currentIndex;
2756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                oldMappingLength = mappingLength;
2786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
2796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(value->length == mappingLength){
2816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uint32_t savedTrieWord = 0;
2826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                trieWord = currentIndex << 2;
2836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* turn on the 2nd bit to signal that the following bits contain an index */
2846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                trieWord += 0x02;
2856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
2866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(trieWord > _SPREP_TYPE_THRESHOLD){
2876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr,"trieWord cannot contain value greater than 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
2886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_ILLEGAL_CHAR_FOUND);
2896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
2906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* figure out if the code point has type already stored */
2916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
2926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(savedTrieWord!=0){
2936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
2946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /* turn on the first bit in trie word */
2956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        trieWord += 0x01;
2966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }else{
2976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        /*
2986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         * the codepoint has value something other than prohibited
2996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         * and a mapping .. error!
3006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                         */
3016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
3026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        exit(U_ILLEGAL_ARGUMENT_ERROR);
3036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
3046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* now set the value in the trie */
3076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
3086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr,"Could not set the value for code point.\n");
3096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_ILLEGAL_ARGUMENT_ERROR);
3106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* written the trie word for the codepoint... increment the count*/
3136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                writtenElementCount++;
3146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* sanity check are we exceeding the max number allowed */
3166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(currentIndex+value->length+1 > _SPREP_MAX_INDEX_VALUE){
3176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr, "Too many entries in the mapping table %i. Maximum allowed is %i\n",
3186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        currentIndex+value->length, _SPREP_MAX_INDEX_VALUE);
3196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_INDEX_OUTOFBOUNDS_ERROR);
3206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* copy the mapping data */
3236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* write the length */
3246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(mappingLength > _SPREP_MAX_INDEX_TOP_LENGTH ){
3256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     /* the cast here is safe since we donot expect the length to be > 65535 */
3266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                     mappingData[currentIndex++] = (uint16_t) mappingLength;
3276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* copy the contents to mappindData array */
3296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                uprv_memmove(mappingData+currentIndex, value->mapping, value->length*U_SIZEOF_UCHAR);
3306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                currentIndex += value->length;
3316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if (currentIndex > mappingDataCapacity) {
3326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* If this happens there is a bug in the computation of the mapping data size in storeMapping() */
3336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr, "gensprep, fatal error at %s, %d.  Aborting.\n", __FILE__, __LINE__);
3346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_INTERNAL_PROGRAM_ERROR);
3356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
3366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
3376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mappingLength++;
3396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        pos = -1;
3406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* set the last length for range check */
3426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(mappingLength <= _SPREP_MAX_INDEX_TOP_LENGTH){
3436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        indexes[_SPREP_NORM_CORRECTNS_LAST_UNI_VERSION+mappingLength] = currentIndex+1;
3446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }else{
3456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        indexes[_SPREP_FOUR_UCHARS_MAPPING_INDEX_START] = limitIndex;
3466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void setOptions(int32_t options){
3516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    indexes[_SPREP_OPTIONS] = options;
3526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
3536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void
3546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreMapping(uint32_t codepoint, uint32_t* mapping,int32_t length,
3556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             UStringPrepType type, UErrorCode* status){
3566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar* map = NULL;
3596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int16_t adjustedLen=0, i, j;
3606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t trieWord = 0;
3616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    ValueStruct *value = NULL;
3626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t savedTrieWord = 0;
3636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* initialize the hashtable */
3656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(hashTable==NULL){
3666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        hashTable = uhash_open(hashEntry, compareEntries, NULL, status);
3676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uhash_setValueDeleter(hashTable, valueDeleter);
3686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* figure out if the code point has type already stored */
3716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    savedTrieWord= utrie_get32(sprepTrie,codepoint,NULL);
3726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(savedTrieWord!=0){
3736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if((savedTrieWord- _SPREP_TYPE_THRESHOLD) == USPREP_PROHIBITED){
3746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* turn on the first bit in trie word */
3756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            trieWord += 0x01;
3766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }else{
3776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /*
3786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * the codepoint has value something other than prohibited
3796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             * and a mapping .. error!
3806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org             */
3816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr,"Type for codepoint \\U%08X already set!.\n", (int)codepoint);
3826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            exit(U_ILLEGAL_ARGUMENT_ERROR);
3836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
3846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* figure out the real length */
3876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for(i=0; i<length; i++){
3886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        adjustedLen += U16_LENGTH(mapping[i]);
3896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
3906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
3916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(adjustedLen == 0){
3926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        trieWord = (uint16_t)(_SPREP_MAX_INDEX_VALUE << 2);
3936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* make sure that the value of trieWord is less than the threshold */
3946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(trieWord < _SPREP_TYPE_THRESHOLD){
3956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* now set the value in the trie */
3966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(!utrie_set32(sprepTrie,codepoint,trieWord)){
3976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fprintf(stderr,"Could not set the value for code point.\n");
3986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                exit(U_ILLEGAL_ARGUMENT_ERROR);
3996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* value is set so just return */
4016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return;
4026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }else{
4036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
4046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            exit(U_ILLEGAL_CHAR_FOUND);
4056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(adjustedLen == 1){
4096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /* calculate the delta */
4106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        int16_t delta = (int16_t)((int32_t)codepoint - (int16_t) mapping[0]);
4116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(delta >= SPREP_DELTA_RANGE_NEGATIVE_LIMIT && delta <= SPREP_DELTA_RANGE_POSITIVE_LIMIT){
4126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            trieWord = delta << 2;
4146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* make sure that the second bit is OFF */
4176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if((trieWord & 0x02) != 0 ){
4186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fprintf(stderr,"The second bit in the trie word is not zero while storing a delta.\n");
4196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                exit(U_INTERNAL_PROGRAM_ERROR);
4206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* make sure that the value of trieWord is less than the threshold */
4226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(trieWord < _SPREP_TYPE_THRESHOLD){
4236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* now set the value in the trie */
4246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(!utrie_set32(sprepTrie,codepoint,trieWord)){
4256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr,"Could not set the value for code point.\n");
4266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_ILLEGAL_ARGUMENT_ERROR);
4276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
4286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* value is set so just return */
4296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                return;
4306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
4316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
4326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        /*
4336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * if the delta is not in the given range or if the trieWord is larger than the threshold
4346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         * just fall through for storing the mapping in the mapping table
4356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org         */
4366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    map = (UChar*) uprv_calloc(adjustedLen + 1, U_SIZEOF_UCHAR);
4396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    for (i=0, j=0; i<length; i++) {
4416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        U16_APPEND_UNSAFE(map, j, mapping[i]);
4426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    value = (ValueStruct*) uprv_malloc(sizeof(ValueStruct));
4456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    value->mapping = map;
4466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    value->type    = type;
4476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    value->length  = adjustedLen;
4486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(value->length > _SPREP_MAX_INDEX_TOP_LENGTH){
4496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        mappingDataCapacity++;
4506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(maxLength < value->length){
4526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        maxLength = value->length;
4536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uhash_iput(hashTable,codepoint,value,status);
4556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    mappingDataCapacity += adjustedLen;
4566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(*status)){
4586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "Failed to put entries into the hastable. Error: %s\n", u_errorName(*status));
4596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(*status);
4606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
4626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void
4656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstoreRange(uint32_t start, uint32_t end, UStringPrepType type,UErrorCode* status){
4666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint16_t trieWord = 0;
4676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if((int)(_SPREP_TYPE_THRESHOLD + type) > 0xFFFF){
4696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr,"trieWord cannot contain value greater than 0xFFFF.\n");
4706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(U_ILLEGAL_CHAR_FOUND);
4716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
4726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    trieWord = (_SPREP_TYPE_THRESHOLD + type); /* the top 4 bits contain the value */
4736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(start == end){
4746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uint32_t savedTrieWord = utrie_get32(sprepTrie, start, NULL);
4756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(savedTrieWord>0){
4766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            if(savedTrieWord < _SPREP_TYPE_THRESHOLD && type == USPREP_PROHIBITED){
4776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /*
4786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * A mapping is stored in the trie word
4796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * and the only other possible type that a
4806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 * code point can have is USPREP_PROHIBITED
4816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 *
4826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                 */
4836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* turn on the 0th bit in the savedTrieWord */
4856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                savedTrieWord += 0x01;
4866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* the downcast is safe since we only save 16 bit values */
4886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                trieWord = (uint16_t)savedTrieWord;
4896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
4906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                /* make sure that the value of trieWord is less than the threshold */
4916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                if(trieWord < _SPREP_TYPE_THRESHOLD){
4926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* now set the value in the trie */
4936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    if(!utrie_set32(sprepTrie,start,trieWord)){
4946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        fprintf(stderr,"Could not set the value for code point.\n");
4956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                        exit(U_ILLEGAL_ARGUMENT_ERROR);
4966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    }
4976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    /* value is set so just return */
4986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    return;
4996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }else{
5006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    fprintf(stderr,"trieWord cannot contain value greater than threshold 0x%04X.\n",_SPREP_TYPE_THRESHOLD);
5016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                    exit(U_ILLEGAL_CHAR_FOUND);
5026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                }
5036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }else if(savedTrieWord != trieWord){
5056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                fprintf(stderr,"Value for codepoint \\U%08X already set!.\n", (int)start);
5066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                exit(U_ILLEGAL_ARGUMENT_ERROR);
5076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            }
5086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            /* if savedTrieWord == trieWord .. fall through and set the value */
5096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(!utrie_set32(sprepTrie,start,trieWord)){
5116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr,"Could not set the value for code point \\U%08X.\n", (int)start);
5126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            exit(U_ILLEGAL_ARGUMENT_ERROR);
5136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }else{
5156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(!utrie_setRange32(sprepTrie, start, end+1, trieWord, FALSE)){
5166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            fprintf(stderr,"Value for certain codepoint already set.\n");
5176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            exit(U_ILLEGAL_CHAR_FOUND);
5186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/* folding value: just store the offset (16 bits) if there is any non-0 entry */
5246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgstatic uint32_t U_CALLCONV
5256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggetFoldedValue(UNewTrie *trie, UChar32 start, int32_t offset) {
5266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uint32_t value;
5276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UChar32 limit=0;
5286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UBool inBlockZero;
5296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    limit=start+0x400;
5316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    while(start<limit) {
5326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        value=utrie_get32(trie, start, &inBlockZero);
5336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        if(inBlockZero) {
5346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            start+=UTRIE_DATA_BLOCK_LENGTH;
5356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else if(value!=0) {
5366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            return (uint32_t)offset;
5376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        } else {
5386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            ++start;
5396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        }
5406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    return 0;
5426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
5446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_IDNA */
5466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void
5486f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orggenerateData(const char *dataDir, const char* bundleName) {
5496f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    static uint8_t sprepTrieBlock[100000];
5506f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5516f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UNewDataMemory *pData;
5526f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    UErrorCode errorCode=U_ZERO_ERROR;
5536f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t size, dataLength;
5546f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    char* fileName = (char*) uprv_malloc(uprv_strlen(bundleName) +100);
5556f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5566f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if UCONFIG_NO_IDNA
5576f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5586f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    size=0;
5596f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5606f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#else
5616f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5626f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    int32_t sprepTrieSize;
5636f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5646f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* sort and add mapping data */
5656f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    storeMappingData();
5666f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5676f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    sprepTrieSize=utrie_serialize(sprepTrie, sprepTrieBlock, sizeof(sprepTrieBlock), getFoldedValue, TRUE, &errorCode);
5686f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
5696f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "error: utrie_serialize(sprep trie) failed, %s\n", u_errorName(errorCode));
5706f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(errorCode);
5716f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5726f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5736f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    size = sprepTrieSize + mappingDataCapacity*U_SIZEOF_UCHAR + sizeof(indexes);
5746f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(beVerbose) {
5756f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("size of sprep trie              %5u bytes\n", (int)sprepTrieSize);
5766f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("size of " U_ICUDATA_NAME "_%s." DATA_TYPE " contents: %ld bytes\n", bundleName,(long)size);
5776f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("size of mapping data array %5u bytes\n",(int)mappingDataCapacity * U_SIZEOF_UCHAR);
5786f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Number of code units in mappingData (currentIndex) are: %i \n", currentIndex);
5796f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        printf("Maximum length of the mapping string is : %i \n", (int)maxLength);
5806f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5816f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5826f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
5836f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5846f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    fileName[0]=0;
5856f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_strcat(fileName,bundleName);
5866f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* write the data */
5876f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    pData=udata_create(dataDir, DATA_TYPE, fileName, &dataInfo,
5886f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org                       haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode);
5896f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
5906f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "gensprep: unable to create the output file, error %d\n", errorCode);
5916f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(errorCode);
5926f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
5936f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5946f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA
5956f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5966f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    indexes[_SPREP_INDEX_TRIE_SIZE]=sprepTrieSize;
5976f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]=mappingDataCapacity*U_SIZEOF_UCHAR;
5986f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
5996f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    udata_writeBlock(pData, indexes, sizeof(indexes));
6006f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    udata_writeBlock(pData, sprepTrieBlock, sprepTrieSize);
6016f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    udata_writeBlock(pData, mappingData, indexes[_SPREP_INDEX_MAPPING_DATA_SIZE]);
6026f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6036f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6046f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
6056f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6066f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* finish up */
6076f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    dataLength=udata_finish(pData, &errorCode);
6086f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(U_FAILURE(errorCode)) {
6096f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "gensprep: error %d writing the output file\n", errorCode);
6106f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(errorCode);
6116f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6126f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6136f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if(dataLength!=size) {
6146f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        fprintf(stderr, "gensprep error: data length %ld != calculated size %ld\n",
6156f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org            (long)dataLength, (long)size);
6166f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        exit(U_INTERNAL_PROGRAM_ERROR);
6176f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6186f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6196f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA
6206f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    /* done with writing the data .. close the hashtable */
6216f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    if (hashTable != NULL) {
6226f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org        uhash_close(hashTable);
6236f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    }
6246f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif
6256f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6266f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(fileName);
6276f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6286f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6296f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#if !UCONFIG_NO_IDNA
6306f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6316f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgextern void
6326f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.orgcleanUpData(void) {
6336f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(mappingData);
6346f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    utrie_close(sprepTrie);
6356f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org    uprv_free(sprepTrie);
6366f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org}
6376f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6386f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org#endif /* #if !UCONFIG_NO_IDNA */
6396f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org
6406f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org/*
6416f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Hey, Emacs, please set the following:
6426f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6436f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * Local Variables:
6446f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * indent-tabs-mode: nil
6456f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org * End:
6466f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org *
6476f31ac30b9092fd02a8c97e5216cf53f3e4fae4jshin@chromium.org */
648