1ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 2ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 3ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 4ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho* Copyright (C) 1999-2009, International Business Machines 5ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Corporation and others. All Rights Reserved. 6ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 7ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru******************************************************************************* 8ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* file name: store.c 9ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* encoding: US-ASCII 10ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* tab size: 8 (not used) 11ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* indentation:4 12ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 13ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created on: 1999dec11 14ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* created by: Markus W. Scherer 15ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* 16ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* Store Unicode character properties efficiently for 17ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru* random access. 18ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru*/ 19ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 20ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include <stdio.h> 21ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/utypes.h" 22ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/uchar.h" 23ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cmemory.h" 24ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "cstring.h" 25ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "utrie.h" 26ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unicode/udata.h" 27ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "unewdata.h" 28ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "writesrc.h" 29ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "uprops.h" 30ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#include "genprops.h" 31ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 32ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru#define DO_DEBUG_OUT 0 33ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 34ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* Unicode character properties file format ------------------------------------ 35ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 36ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe file format prepared and written here contains several data 37ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustructures that store indexes or data. 38ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 39ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruBefore the data contents described below, there are the headers required by 40ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruthe udata API for loading ICU data. Especially, a UDataInfo structure 41ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruprecedes the actual data. It contains platform properties values and the 42ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querufile format version. 43ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 44ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoThe following is a description of format version 6 . 4585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 46ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruData contents: 47ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 48ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe contents is a parsed, binary form of several Unicode character 49ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querudatabase files, most prominently UnicodeData.txt. 50ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 51ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAny Unicode code point from 0 to 0x10ffff can be looked up to get 52ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruthe properties, if any, for that code point. This means that the input 53ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruto the lookup are 21-bit unsigned integers, with not all of the 54ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru21-bit range used. 55ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 56ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruIt is assumed that client code keeps a uint32_t pointer 57ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruto the beginning of the data: 58ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 59ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const uint32_t *p32; 60ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 61ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruFormally, the file contains the following structures: 62ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 63ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru const int32_t indexes[16] with values i0..i15: 64ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 65ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i0 indicates the length of the main trie. 66ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho i0..i3 all have the same value in format versions 4.0 and higher; 67ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru the related props32[] and exceptions[] and uchars[] were used in format version 3 68ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 69ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i0 propsIndex; -- 32-bit unit index to the table of 32-bit properties words 70ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i1 exceptionsIndex; -- 32-bit unit index to the table of 32-bit exception words 71ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i2 exceptionsTopIndex; -- 32-bit unit index to the array of UChars for special mappings 72ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 73ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i3 additionalTrieIndex; -- 32-bit unit index to the additional trie for more properties 74ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i4 additionalVectorsIndex; -- 32-bit unit index to the table of properties vectors 75ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i5 additionalVectorsColumns; -- number of 32-bit words per properties vector 76ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 77ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i6 reservedItemIndex; -- 32-bit unit index to the top of the properties vectors table 78ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i7..i9 reservedIndexes; -- reserved values; 0 for now 79ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 80ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i10 maxValues; -- maximum code values for vector word 0, see uprops.h (new in format version 3.1+) 81ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i11 maxValues2; -- maximum code values for vector word 2, see uprops.h (new in format version 3.2) 82ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru i12..i15 reservedIndexes; -- reserved values; 0 for now 83ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 84ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru PT serialized properties trie, see utrie.h (byte size: 4*(i0-16)) 85ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 86ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho P, E, and U are not used (empty) in format versions 4 and above 87ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 88ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru P const uint32_t props32[i1-i0]; 89ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru E const uint32_t exceptions[i2-i1]; 90ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U const UChar uchars[2*(i3-i2)]; 91ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 92ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru AT serialized trie for additional properties (byte size: 4*(i4-i3)) 93ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru PV const uint32_t propsVectors[(i6-i4)/i5][i5]==uint32_t propsVectors[i6-i4]; 94ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 95ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruTrie lookup and properties: 96ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 97ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruIn order to condense the data for the 21-bit code space, several properties of 98ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruthe Unicode code assignment are exploited: 99ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The code space is sparse. 100ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- There are several 10k of consecutive codes with the same properties. 101ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- Characters and scripts are allocated in groups of 16 code points. 102ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- Inside blocks for scripts the properties are often repetitive. 103ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The 21-bit space is not fully used for Unicode. 104ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 105ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe lookup of properties for a given code point is done with a trie lookup, 106ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruusing the UTrie implementation. 107ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe trie lookup result is a 16-bit properties word. 108ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 109ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruWith a given Unicode code point 110ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 111ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UChar32 c; 112ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 113ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruand 0<=c<0x110000, the lookup is done like this: 114ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 115ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint16_t props; 116ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTRIE_GET16(trie, c, props); 117ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 118ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEach 16-bit properties word contains: 119ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 120ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0.. 4 general category 121ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 5 reserved 122ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 6..15 numeric type and value (ntv) 123ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 124ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoEncoding of numeric type and value in the 10-bit ntv field: 125ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv type value 126ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 0 U_NT_NONE 0 127ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 1..10 U_NT_DECIMAL 0..9 128ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 11..20 U_NT_DIGIT 0..9 129ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 21..0x2ff U_NT_NUMERIC see below 130ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 0x300..0x3ff reserved 131ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 132ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho For U_NT_NUMERIC: 133ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv value 134ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 21..0xaf integer 0..154 135ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 0xb0..0x1df fraction ((ntv>>4)-12) / ((ntv&0xf)+1) = -1..17 / 1..16 136ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 0x1e0..0x2ff large int ((ntv>>5)-14) * 10^((ntv&0x1f)+2) = (1..9)*(10^2..10^33) 137ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho (only one significant decimal digit) 138ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 139ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru--- Additional properties (new in format version 2.1) --- 140ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 141ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe second trie for additional properties (AT) is also a UTrie with 16-bit data. 142ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe data words consist of 32-bit unit indexes (not row indexes!) into the 143ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querutable of unique properties vectors (PV). 144ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruEach vector contains a set of properties. 145ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe width of a vector (number of uint32_t per row) may change 146ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruwith the formatVersion, it is stored in i5. 147ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 148ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruCurrent properties: see icu/source/common/uprops.h 149ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 150ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru--- Changes in format version 3.1 --- 151ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 152ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruSee i10 maxValues above, contains only UBLOCK_COUNT and USCRIPT_CODE_LIMIT. 153ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 154ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru--- Changes in format version 3.2 --- 155ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 156ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The tries use linear Latin-1 ranges. 157ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The additional properties bits store full properties XYZ instead 158ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru of partial Other_XYZ, so that changes in the derivation formulas 159ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru need not be tracked in runtime library code. 160ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- Joining Type and Line Break are also stored completely, so that uprops.c 161ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru needs no runtime formulas for enumerated properties either. 162ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- Store the case-sensitive flag in the main properties word. 163ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- i10 also contains U_LB_COUNT and U_EA_COUNT. 164ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- i11 contains maxValues2 for vector word 2. 165ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 166ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru--- Changes in format version 4 --- 167ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 168ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe format changes between version 3 and 4 because the properties related to 169ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querucase mappings and bidi/shaping are pulled out into separate files 170ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querufor modularization. 171ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruIn order to reduce the need for code changes, some of the previous data 172ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustructures are omitted, rather than rearranging everything. 173ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 174ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru(The change to format version 4 is for ICU 3.4. The last CVS revision of 175ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querugenprops/store.c for format version 3.2 is 1.48.) 176ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 177ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe main trie's data is significantly simplified: 178ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The trie's 16-bit data word is used directly instead of as an index 179ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru into props32[]. 180ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- The trie uses the default trie folding functions instead of custom ones. 181ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- Numeric values are stored directly in the trie data word, with special 182ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru encodings. 183ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- No more exception data (the data that needed it was pulled out, or, in the 184ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru case of numeric values, encoded differently). 185ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru- No more string data (pulled out - was for case mappings). 186ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 187ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruAlso, some of the previously used properties vector bits are reserved again. 188ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 189ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruThe indexes[] values for the omitted structures are still filled in 190ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru(indicating zero-length arrays) so that the swapper code remains unchanged. 191ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 19285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho--- Changes in format version 5 --- 19385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 194ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoFormat version 5 became necessary because the bit field for script codes 195ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehooverflowed. The changes are incompatible because 196ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoold code would have seen nonsensically low values for new, higher script codes. 197ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 198ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoRearranged bit fields in the second trie (AT) and widened three (Script, Block, 199ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoWord_Break) by one bit each. 200ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 20185bf2e2fbc60a9f938064abc8127d61da7d19882Claire HoModified bit fields in icu/source/common/uprops.h 20285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 203ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho--- Changes in format version 6 --- 204ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 205ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehoFormat version 6 became necessary because Unicode 5.2 adds fractions with 206ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehodenominators 9, 10 and 16, and it was easier to redesign the encoding of numeric 207ea1f1813c8b13a850b13f256aeb5152bb0942e81clairehotypes and values rather than add another variant to the previous format. 208ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho 209ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru----------------------------------------------------------------------------- */ 210ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 211ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* UDataInfo cf. udata.h */ 212ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UDataInfo dataInfo={ 213ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru sizeof(UDataInfo), 214ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 215ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 216ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_IS_BIG_ENDIAN, 217ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_CHARSET_FAMILY, 218ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru U_SIZEOF_UCHAR, 219ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 220ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 221ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru { 0x55, 0x50, 0x72, 0x6f }, /* dataFormat="UPro" */ 222ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho { 6, 0, UTRIE_SHIFT, UTRIE_INDEX_SHIFT }, /* formatVersion */ 22385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 5, 1, 0, 0 } /* dataVersion */ 224ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru}; 225ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 226ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Querustatic UNewTrie *pTrie=NULL; 227ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 228ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* -------------------------------------------------------------------------- */ 229ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 230ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 231ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerusetUnicodeVersion(const char *v) { 232ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UVersionInfo version; 233ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_versionFromString(version, v); 234ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uprv_memcpy(dataInfo.dataVersion, version, 4); 235ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 236ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 237ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 238ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruinitStore() { 239ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pTrie=utrie_open(NULL, NULL, 40000, 0, 0, TRUE); 240ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(pTrie==NULL) { 241ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: unable to create a UNewTrie\n"); 242ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_MEMORY_ALLOCATION_ERROR); 243ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 244ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 245ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru initAdditionalProperties(); 246ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 247ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 248ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 249ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruexitStore() { 250ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utrie_close(pTrie); 251ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exitAdditionalProperties(); 252ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 253ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 254ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* store a character's properties ------------------------------------------- */ 255ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 256ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern uint32_t 257ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerumakeProps(Props *p) { 258ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t den; 259ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho int32_t type, value, exp, ntv; 260ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 261ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* encode numeric type & value */ 262ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru type=p->numericType; 263ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru value=p->numericValue; 264ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru den=p->denominator; 265ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exp=p->exponent; 266ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 267ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=-1; /* the numeric type and value cannot be encoded if ntv remains -1 */ 268ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho switch(type) { 269ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho case U_NT_NONE: 270ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(value==0 && den==0 && exp==0) { 271ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=UPROPS_NTV_NONE; 272ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 273ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho break; 274ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho case U_NT_DECIMAL: 275ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(0<=value && value<=9 && den==0 && exp==0) { 276ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=UPROPS_NTV_DECIMAL_START+value; 277ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 278ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho break; 279ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho case U_NT_DIGIT: 280ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(0<=value && value<=9 && den==0 && exp==0) { 281ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=UPROPS_NTV_DIGIT_START+value; 282ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 283ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho break; 284ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho case U_NT_NUMERIC: 285ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(den==0) { 286ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(exp==2 && (value*100)<=UPROPS_NTV_MAX_SMALL_INT) { 287ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* small integer parsed like a large one */ 288ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=UPROPS_NTV_NUMERIC_START+value*100; 289ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } else if(exp==0 && value>=0) { 290ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(value<=UPROPS_NTV_MAX_SMALL_INT) { 291ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* small integer */ 292ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=UPROPS_NTV_NUMERIC_START+value; 293ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } else { 294ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* large integer parsed like a small one */ 295ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* split the value into mantissa and exponent, base 10 */ 296ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho int32_t mant=value; 297ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho while((mant%10)==0) { 298ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho mant/=10; 299ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ++exp; 300ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } 301ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(mant<=9) { 302ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=((mant+14)<<5)+(exp-2); 303ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } 304ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } 305ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } else if(2<=exp && exp<=33 && 1<=value && value<=9) { 306ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* large, single-significant-digit integer */ 307ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=((value+14)<<5)+(exp-2); 308ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } 309ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } else if(exp==0) { 310ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(-1<=value && value<=17 && 1<=den && den<=16) { 311ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* fraction */ 312ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho ntv=((value+12)<<4)+(den-1); 313ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 314ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 315ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho default: 316ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho break; 317ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho } 318ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho if(ntv<0) { 319ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho fprintf(stderr, "genprops error: unable to encode numeric type %d & value %ld/%lu E%d\n", 320ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho (int)type, (long)value, (unsigned long)den, exp); 321ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho exit(U_ILLEGAL_ARGUMENT_ERROR); 322ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 323ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 324ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* encode the properties */ 325ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return 326ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru (uint32_t)p->generalCategory | 327ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho (ntv<<UPROPS_NUMERIC_TYPE_VALUE_SHIFT); 328ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 329ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 330ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 331ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QueruaddProps(uint32_t c, uint32_t x) { 332ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_set32(pTrie, (UChar32)c, x)) { 333ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many entries for the properties trie\n"); 334ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_BUFFER_OVERFLOW_ERROR); 335ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 336ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 337ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 338ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern uint32_t 339ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugetProps(uint32_t c) { 340ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru return utrie_get32(pTrie, (UChar32)c, NULL); 341ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 342ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 343ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* areas of same properties ------------------------------------------------- */ 344ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 345ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 346ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerurepeatProps(uint32_t first, uint32_t last, uint32_t x) { 347ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(!utrie_setRange32(pTrie, (UChar32)first, (UChar32)(last+1), x, FALSE)) { 348ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: too many entries for the properties trie\n"); 349ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_BUFFER_OVERFLOW_ERROR); 350ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 351ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 352ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 353ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* generate output data ----------------------------------------------------- */ 354ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 355ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queruextern void 356ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste QuerugenerateData(const char *dataDir, UBool csource) { 357ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static int32_t indexes[UPROPS_INDEX_COUNT]={ 358ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 359ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 360ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0, 361ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 0, 0, 0, 0 362ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru }; 363ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static uint8_t trieBlock[40000]; 364ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru static uint8_t additionalProps[120000]; 365ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 366ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UNewDataMemory *pData; 367ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UErrorCode errorCode=U_ZERO_ERROR; 368ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru uint32_t size = 0; 369ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru int32_t trieSize, additionalPropsSize, offset; 370ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru long dataLength; 371ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 372ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieSize=utrie_serialize(pTrie, trieBlock, sizeof(trieBlock), NULL, TRUE, &errorCode); 373ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 374ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "error: utrie_serialize failed: %s (length %ld)\n", u_errorName(errorCode), (long)trieSize); 375ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 376ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 377ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 378ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset=sizeof(indexes)/4; /* uint32_t offset to the properties trie */ 379ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 380ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* round up trie size to 4-alignment */ 381ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru trieSize=(trieSize+3)&~3; 382ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru offset+=trieSize>>2; 383ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UPROPS_PROPS32_INDEX]= /* set indexes to the same offsets for empty */ 384ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UPROPS_EXCEPTIONS_INDEX]= /* structures from the old format version 3 */ 385ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UPROPS_EXCEPTIONS_TOP_INDEX]= /* so that less runtime code has to be changed */ 386ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes[UPROPS_ADDITIONAL_TRIE_INDEX]=offset; 387ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 388ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(beVerbose) { 389ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("trie size in bytes: %5u\n", (int)trieSize); 390ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 391ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 392ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(csource) { 393ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write .c file for hardcoded data */ 394ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru UTrie trie={ NULL }; 39585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UTrie2 *trie2; 396ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru FILE *f; 397ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 398ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru utrie_unserialize(&trie, trieBlock, trieSize, &errorCode); 399ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 400ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf( 401ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru stderr, 402ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "genprops error: failed to utrie_unserialize(uprops.icu main trie) - %s\n", 403ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru u_errorName(errorCode)); 40485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho exit(errorCode); 40585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 40685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho 40785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* use UTrie2 */ 40885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie2=utrie2_fromUTrie(&trie, 0, &errorCode); 40985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(errorCode)) { 41085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fprintf( 41185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho stderr, 41285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho "genprops error: utrie2_fromUTrie() failed - %s\n", 41385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho u_errorName(errorCode)); 41485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho exit(errorCode); 41585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 41685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho { 41785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho /* delete lead surrogate code unit values */ 41885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho UChar lead; 41985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie2=utrie2_cloneAsThawed(trie2, &errorCode); 42085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho for(lead=0xd800; lead<0xdc00; ++lead) { 42185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_set32ForLeadSurrogateCodeUnit(trie2, lead, trie2->initialValue, &errorCode); 42285bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 42385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_freeze(trie2, UTRIE2_16_VALUE_BITS, &errorCode); 42485bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho if(U_FAILURE(errorCode)) { 42585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho fprintf( 42685bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho stderr, 42785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho "genprops error: deleting lead surrogate code unit values failed - %s\n", 42885bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho u_errorName(errorCode)); 42985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho exit(errorCode); 43085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho } 431ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 432ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 433ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru f=usrc_create(dataDir, "uchar_props_data.c"); 434ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(f!=NULL) { 435ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho /* unused 436ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usrc_writeArray(f, 437ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "static const UVersionInfo formatVersion={", 438ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataInfo.formatVersion, 8, 4, 439ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "};\n\n"); 440ea1f1813c8b13a850b13f256aeb5152bb0942e81claireho */ 441ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usrc_writeArray(f, 442ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "static const UVersionInfo dataVersion={", 443ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataInfo.dataVersion, 8, 4, 444ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "};\n\n"); 44585bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho usrc_writeUTrie2Arrays(f, 446ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "static const uint16_t propsTrie_index[%ld]={\n", NULL, 44785bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie2, 448ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "\n};\n\n"); 44985bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho usrc_writeUTrie2Struct(f, 45085bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho "static const UTrie2 propsTrie={\n", 45185bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho trie2, "propsTrie_index", NULL, 452ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "};\n\n"); 453ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 454ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru additionalPropsSize=writeAdditionalData(f, additionalProps, sizeof(additionalProps), indexes); 455ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size=4*offset+additionalPropsSize; /* total size of data */ 456ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 457ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru usrc_writeArray(f, 458ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "static const int32_t indexes[UPROPS_INDEX_COUNT]={", 459ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru indexes, 32, UPROPS_INDEX_COUNT, 460ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru "};\n\n"); 461ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fclose(f); 462ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 46385bf2e2fbc60a9f938064abc8127d61da7d19882Claire Ho utrie2_close(trie2); 464ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } else { 465ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* write the data */ 466ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru pData=udata_create(dataDir, DATA_TYPE, DATA_NAME, &dataInfo, 467ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru haveCopyright ? U_COPYRIGHT_STRING : NULL, &errorCode); 468ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 469ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "genprops: unable to create data memory, %s\n", u_errorName(errorCode)); 470ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 471ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 472ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 473ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru additionalPropsSize=writeAdditionalData(NULL, additionalProps, sizeof(additionalProps), indexes); 474ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru size=4*offset+additionalPropsSize; /* total size of data */ 475ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 476ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, indexes, sizeof(indexes)); 477ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, trieBlock, trieSize); 478ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru udata_writeBlock(pData, additionalProps, additionalPropsSize); 479ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 480ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru /* finish up */ 481ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataLength=udata_finish(pData, &errorCode); 482ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(U_FAILURE(errorCode)) { 483ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "genprops: error %d writing the output file\n", errorCode); 484ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(errorCode); 485ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 486ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 487ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(dataLength!=(long)size) { 488ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru fprintf(stderr, "genprops: data length %ld != calculated size %lu\n", 489ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru dataLength, (unsigned long)size); 490ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru exit(U_INTERNAL_PROGRAM_ERROR); 491ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 492ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 493ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 494ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru if(beVerbose) { 495ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru printf("data size: %6lu\n", (unsigned long)size); 496ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru } 497ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru} 498ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru 499ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru/* 500ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Hey, Emacs, please set the following: 501ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 502ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * Local Variables: 503ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * indent-tabs-mode: nil 504ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * End: 505ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru * 506ac04d0bbe12b3ef54518635711412f178cb4d16Jean-Baptiste Queru */ 507