1/*
2**********************************************************************
3*   Copyright (C) 2002-2006, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   10/11/02    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12#include "unicode/putil.h"
13#include "unicode/uclean.h"
14#include "cmemory.h"
15#include "cstring.h"
16#include "filestrm.h"
17#include "uarrsort.h"
18#include "unewdata.h"
19#include "uoptions.h"
20#include "uprops.h"
21#include "propname.h"
22#include "uassert.h"
23
24#include <stdio.h>
25
26U_NAMESPACE_USE
27
28// TODO: Clean up and comment this code.
29
30//----------------------------------------------------------------------
31// BEGIN DATA
32//
33// This is the raw data to be output.  We define the data structure,
34// then include a machine-generated header that contains the actual
35// data.
36
37#include "unicode/uchar.h"
38#include "unicode/uscript.h"
39#include "unicode/unorm.h"
40
41class AliasName {
42public:
43    const char* str;
44    int32_t     index;
45
46    AliasName(const char* str, int32_t index);
47
48    int compare(const AliasName& other) const;
49
50    UBool operator==(const AliasName& other) const {
51        return compare(other) == 0;
52    }
53
54    UBool operator!=(const AliasName& other) const {
55        return compare(other) != 0;
56    }
57};
58
59AliasName::AliasName(const char* _str,
60               int32_t _index) :
61    str(_str),
62    index(_index)
63{
64}
65
66int AliasName::compare(const AliasName& other) const {
67    return uprv_comparePropertyNames(str, other.str);
68}
69
70class Alias {
71public:
72    int32_t     enumValue;
73    int32_t     nameGroupIndex;
74
75    Alias(int32_t enumValue,
76             int32_t nameGroupIndex);
77
78    int32_t getUniqueNames(int32_t* nameGroupIndices) const;
79};
80
81Alias::Alias(int32_t anEnumValue,
82                   int32_t aNameGroupIndex) :
83    enumValue(anEnumValue),
84    nameGroupIndex(aNameGroupIndex)
85{
86}
87
88class Property : public Alias {
89public:
90    int32_t         valueCount;
91    const Alias* valueList;
92
93    Property(int32_t enumValue,
94                       int32_t nameGroupIndex,
95                       int32_t valueCount,
96                       const Alias* valueList);
97};
98
99Property::Property(int32_t _enumValue,
100                                       int32_t _nameGroupIndex,
101                                       int32_t _valueCount,
102                                       const Alias* _valueList) :
103    Alias(_enumValue, _nameGroupIndex),
104    valueCount(_valueCount),
105    valueList(_valueList)
106{
107}
108
109// *** Include the data header ***
110#include "data.h"
111
112/* return a list of unique names, not including "", for this property
113 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
114 * elements, will be filled with indices into STRING_TABLE
115 * @return number of indices, >= 1
116 */
117int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
118    int32_t count = 0;
119    int32_t i = nameGroupIndex;
120    UBool done = FALSE;
121    while (!done) {
122        int32_t j = NAME_GROUP[i++];
123        if (j < 0) {
124            done = TRUE;
125            j = -j;
126        }
127        if (j == 0) continue; // omit "" entries
128        UBool dupe = FALSE;
129        for (int32_t k=0; k<count; ++k) {
130            if (stringIndices[k] == j) {
131                dupe = TRUE;
132                break;
133            }
134            // also do a string check for things like "age|Age"
135            if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
136                //printf("Found dupe %s|%s\n",
137                //       STRING_TABLE[stringIndices[k]].str,
138                //       STRING_TABLE[j].str);
139                dupe = TRUE;
140                break;
141            }
142        }
143        if (dupe) continue; // omit duplicates
144        stringIndices[count++] = j;
145    }
146    return count;
147}
148
149// END DATA
150//----------------------------------------------------------------------
151
152#define MALLOC(type, count) \
153  (type*) uprv_malloc(sizeof(type) * count)
154
155void die(const char* msg) {
156    fprintf(stderr, "Error: %s\n", msg);
157    exit(1);
158}
159
160//----------------------------------------------------------------------
161
162/**
163 * A list of Alias objects.
164 */
165class AliasList {
166public:
167    virtual ~AliasList();
168    virtual const Alias& operator[](int32_t i) const = 0;
169    virtual int32_t count() const = 0;
170};
171
172AliasList::~AliasList() {}
173
174/**
175 * A single array.
176 */
177class AliasArrayList : public AliasList {
178    const Alias* a;
179    int32_t n;
180public:
181    AliasArrayList(const Alias* _a, int32_t _n) {
182        a = _a;
183        n = _n;
184    }
185    virtual const Alias& operator[](int32_t i) const {
186        return a[i];
187    }
188    virtual int32_t count() const {
189        return n;
190    }
191};
192
193/**
194 * A single array.
195 */
196class PropertyArrayList : public AliasList {
197    const Property* a;
198    int32_t n;
199public:
200    PropertyArrayList(const Property* _a, int32_t _n) {
201        a = _a;
202        n = _n;
203    }
204    virtual const Alias& operator[](int32_t i) const {
205        return a[i];
206    }
207    virtual int32_t count() const {
208        return n;
209    }
210};
211
212//----------------------------------------------------------------------
213
214/**
215 * An element in a name index.  It maps a name (given by index) into
216 * an enum value.
217 */
218class NameToEnumEntry {
219public:
220    int32_t nameIndex;
221    int32_t enumValue;
222    NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
223};
224
225// Sort function for NameToEnumEntry (sort by name)
226U_CFUNC int32_t
227compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
228    return
229        STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
230            compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
231}
232
233//----------------------------------------------------------------------
234
235/**
236 * An element in an enum index.  It maps an enum into a name group entry
237 * (given by index).
238 */
239class EnumToNameGroupEntry {
240public:
241    int32_t enumValue;
242    int32_t nameGroupIndex;
243    EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
244
245    // are enumValues contiguous for count entries starting with this one?
246    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
247    UBool isContiguous(int32_t count) const {
248        const EnumToNameGroupEntry* p = this;
249        for (int32_t i=1; i<count; ++i) {
250            if (p[i].enumValue != (this->enumValue + i)) {
251                return FALSE;
252            }
253        }
254        return TRUE;
255    }
256};
257
258// Sort function for EnumToNameGroupEntry (sort by name index)
259U_CFUNC int32_t
260compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
261    return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
262}
263
264//----------------------------------------------------------------------
265
266/**
267 * An element in the map from enumerated property enums to value maps.
268 */
269class EnumToValueEntry {
270public:
271    int32_t enumValue;
272    EnumToNameGroupEntry* enumToName;
273    int32_t enumToName_count;
274    NameToEnumEntry* nameToEnum;
275    int32_t nameToEnum_count;
276
277    // are enumValues contiguous for count entries starting with this one?
278    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
279    UBool isContiguous(int32_t count) const {
280        const EnumToValueEntry* p = this;
281        for (int32_t i=1; i<count; ++i) {
282            if (p[i].enumValue != (this->enumValue + i)) {
283                return FALSE;
284            }
285        }
286        return TRUE;
287    }
288};
289
290// Sort function for EnumToValueEntry (sort by enum)
291U_CFUNC int32_t
292compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
293    return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
294}
295
296//----------------------------------------------------------------------
297// BEGIN Builder
298
299#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
300
301class Builder {
302    // header:
303    PropertyAliases header;
304
305    // 0:
306    NonContiguousEnumToOffset* enumToName;
307    int32_t enumToName_size;
308    Offset enumToName_offset;
309
310    // 1: (deleted)
311
312    // 2:
313    NameToEnum* nameToEnum;
314    int32_t nameToEnum_size;
315    Offset nameToEnum_offset;
316
317    // 3:
318    NonContiguousEnumToOffset* enumToValue;
319    int32_t enumToValue_size;
320    Offset enumToValue_offset;
321
322    // 4:
323    ValueMap* valueMap;
324    int32_t valueMap_size;
325    int32_t valueMap_count;
326    Offset valueMap_offset;
327
328    // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
329    // NULL and one is not.  valueEnumToName_size[i] is the size of
330    // the non-NULL one.  i=0..valueMapCount-1
331    // 5a:
332    EnumToOffset** valueEnumToName;
333    // 5b:
334    NonContiguousEnumToOffset** valueNCEnumToName;
335    int32_t* valueEnumToName_size;
336    Offset* valueEnumToName_offset;
337    // 6:
338    // arrays of valueMap_count pointers, sizes, & offsets
339    NameToEnum** valueNameToEnum;
340    int32_t* valueNameToEnum_size;
341    Offset* valueNameToEnum_offset;
342
343    // 98:
344    Offset* nameGroupPool;
345    int32_t nameGroupPool_count;
346    int32_t nameGroupPool_size;
347    Offset nameGroupPool_offset;
348
349    // 99:
350    char* stringPool;
351    int32_t stringPool_count;
352    int32_t stringPool_size;
353    Offset stringPool_offset;
354    Offset* stringPool_offsetArray; // relative to stringPool
355
356    int32_t total_size; // size of everything
357
358    int32_t debug;
359
360public:
361
362    Builder(int32_t debugLevel);
363    ~Builder();
364
365    void buildTopLevelProperties(const NameToEnumEntry* propName,
366                                 int32_t propNameCount,
367                                 const EnumToNameGroupEntry* propEnum,
368                                 int32_t propEnumCount);
369
370    void buildValues(const EnumToValueEntry* e2v,
371                     int32_t count);
372
373    void buildStringPool(const AliasName* propertyNames,
374                         int32_t propertyNameCount,
375                         const int32_t* nameGroupIndices,
376                         int32_t nameGroupIndicesCount);
377
378    void fixup();
379
380    int8_t* createData(int32_t& length) const;
381
382private:
383
384    static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
385                                           int32_t count,
386                                           int32_t& size);
387    static NonContiguousEnumToOffset*
388        buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
389                               int32_t count,
390                               int32_t& size);
391
392    static NonContiguousEnumToOffset*
393        buildNCEnumToValue(const EnumToValueEntry* e2v,
394                           int32_t count,
395                           int32_t& size);
396
397    static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
398                                       int32_t count,
399                                       int32_t& size);
400
401    Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
402    void fixupNameToEnum(NameToEnum* n);
403    void fixupEnumToNameGroup(EnumToOffset* e2ng);
404    void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
405
406    void computeOffsets();
407    void fixupStringPoolOffsets();
408    void fixupNameGroupPoolOffsets();
409    void fixupMiscellaneousOffsets();
410
411    static int32_t align(int32_t a);
412    static void erase(void* p, int32_t size);
413};
414
415Builder::Builder(int32_t debugLevel) {
416    debug = debugLevel;
417    enumToName = 0;
418    nameToEnum = 0;
419    enumToValue = 0;
420    valueMap_count = 0;
421    valueMap = 0;
422    valueEnumToName = 0;
423    valueNCEnumToName = 0;
424    valueEnumToName_size = 0;
425    valueEnumToName_offset = 0;
426    valueNameToEnum = 0;
427    valueNameToEnum_size = 0;
428    valueNameToEnum_offset = 0;
429    nameGroupPool = 0;
430    stringPool = 0;
431    stringPool_offsetArray = 0;
432}
433
434Builder::~Builder() {
435    uprv_free(enumToName);
436    uprv_free(nameToEnum);
437    uprv_free(enumToValue);
438    uprv_free(valueMap);
439    for (int32_t i=0; i<valueMap_count; ++i) {
440        uprv_free(valueEnumToName[i]);
441        uprv_free(valueNCEnumToName[i]);
442        uprv_free(valueNameToEnum[i]);
443    }
444    uprv_free(valueEnumToName);
445    uprv_free(valueNCEnumToName);
446    uprv_free(valueEnumToName_size);
447    uprv_free(valueEnumToName_offset);
448    uprv_free(valueNameToEnum);
449    uprv_free(valueNameToEnum_size);
450    uprv_free(valueNameToEnum_offset);
451    uprv_free(nameGroupPool);
452    uprv_free(stringPool);
453    uprv_free(stringPool_offsetArray);
454}
455
456int32_t Builder::align(int32_t a) {
457    U_ASSERT(a >= 0);
458    int32_t k = a % sizeof(int32_t);
459    if (k == 0) {
460        return a;
461    }
462    a += sizeof(int32_t) - k;
463    return a;
464}
465
466void Builder::erase(void* p, int32_t size) {
467    U_ASSERT(size >= 0);
468    int8_t* q = (int8_t*) p;
469    while (size--) {
470        *q++ = 0;
471    }
472}
473
474EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
475                                         int32_t count,
476                                         int32_t& size) {
477    U_ASSERT(e2ng->isContiguous(count));
478    size = align(EnumToOffset::getSize(count));
479    EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
480    erase(result, size);
481    result->enumStart = e2ng->enumValue;
482    result->enumLimit = e2ng->enumValue + count;
483    Offset* p = result->getOffsetArray();
484    for (int32_t i=0; i<count; ++i) {
485        // set these to NGI index values
486        // fix them up to NGI offset values
487        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
488        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
489    }
490    return result;
491}
492
493NonContiguousEnumToOffset*
494Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
495                                int32_t count,
496                                int32_t& size) {
497    U_ASSERT(!e2ng->isContiguous(count));
498    size = align(NonContiguousEnumToOffset::getSize(count));
499    NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
500    erase(nc, size);
501    nc->count = count;
502    EnumValue* e = nc->getEnumArray();
503    Offset* p = nc->getOffsetArray();
504    for (int32_t i=0; i<count; ++i) {
505        // set these to NGI index values
506        // fix them up to NGI offset values
507        e[i] = e2ng[i].enumValue;
508        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
509        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
510    }
511    return nc;
512}
513
514NonContiguousEnumToOffset*
515Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
516                            int32_t count,
517                            int32_t& size) {
518    U_ASSERT(!e2v->isContiguous(count));
519    size = align(NonContiguousEnumToOffset::getSize(count));
520    NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
521    erase(result, size);
522    result->count = count;
523    EnumValue* e = result->getEnumArray();
524    for (int32_t i=0; i<count; ++i) {
525        e[i] = e2v[i].enumValue;
526        // offset must be set later
527    }
528    return result;
529}
530
531/**
532 * Given an index into the string pool, return an offset.  computeOffsets()
533 * must have been called already.  If allowNegative is true, allow negatives
534 * and preserve their sign.
535 */
536Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
537    // Index 0 is ""; we turn this into an Offset of zero
538    if (index == 0) return 0;
539    if (index < 0) {
540        if (allowNegative) {
541            return -Builder::stringIndexToOffset(-index);
542        } else {
543            die("Negative string pool index");
544        }
545    } else {
546        if (index >= stringPool_count) {
547            die("String pool index too large");
548        }
549        Offset result = stringPool_offset + stringPool_offsetArray[index];
550        U_ASSERT(result >= 0 && result < total_size);
551        return result;
552    }
553    return 0; // never executed; make compiler happy
554}
555
556NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
557                                     int32_t count,
558                                     int32_t& size) {
559    size = align(NameToEnum::getSize(count));
560    NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
561    erase(n2e, size);
562    n2e->count = count;
563    Offset* p = n2e->getNameArray();
564    EnumValue* e = n2e->getEnumArray();
565    for (int32_t i=0; i<count; ++i) {
566        // set these to SP index values
567        // fix them up to SP offset values
568        U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
569        p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
570        e[i] = nameToEnum[i].enumValue;
571    }
572    return n2e;
573}
574
575
576void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
577                                      int32_t propNameCount,
578                                      const EnumToNameGroupEntry* propEnum,
579                                      int32_t propEnumCount) {
580    enumToName = buildNCEnumToNameGroup(propEnum,
581                                        propEnumCount,
582                                        enumToName_size);
583    nameToEnum = buildNameToEnum(propName,
584                                 propNameCount,
585                                 nameToEnum_size);
586}
587
588void Builder::buildValues(const EnumToValueEntry* e2v,
589                          int32_t count) {
590    int32_t i;
591
592    U_ASSERT(!e2v->isContiguous(count));
593
594    valueMap_count = count;
595
596    enumToValue = buildNCEnumToValue(e2v, count,
597                                     enumToValue_size);
598
599    valueMap_size = align(count * sizeof(ValueMap));
600    valueMap = (ValueMap*) uprv_malloc(valueMap_size);
601    erase(valueMap, valueMap_size);
602
603    valueEnumToName = MALLOC(EnumToOffset*, count);
604    valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
605    valueEnumToName_size = MALLOC(int32_t, count);
606    valueEnumToName_offset = MALLOC(Offset, count);
607    valueNameToEnum = MALLOC(NameToEnum*, count);
608    valueNameToEnum_size = MALLOC(int32_t, count);
609    valueNameToEnum_offset = MALLOC(Offset, count);
610
611    for (i=0; i<count; ++i) {
612        UBool isContiguous =
613            e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
614        valueEnumToName[i] = 0;
615        valueNCEnumToName[i] = 0;
616        if (isContiguous) {
617            valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
618                                                   e2v[i].enumToName_count,
619                                                   valueEnumToName_size[i]);
620        } else {
621            valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
622                                                          e2v[i].enumToName_count,
623                                                          valueEnumToName_size[i]);
624        }
625        valueNameToEnum[i] =
626            buildNameToEnum(e2v[i].nameToEnum,
627                            e2v[i].nameToEnum_count,
628                            valueNameToEnum_size[i]);
629    }
630}
631
632void Builder::buildStringPool(const AliasName* propertyNames,
633                              int32_t propertyNameCount,
634                              const int32_t* nameGroupIndices,
635                              int32_t nameGroupIndicesCount) {
636    int32_t i;
637
638    nameGroupPool_count = nameGroupIndicesCount;
639    nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
640    nameGroupPool = MALLOC(Offset, nameGroupPool_count);
641
642    for (i=0; i<nameGroupPool_count; ++i) {
643        // Some indices are negative.
644        int32_t a = nameGroupIndices[i];
645        if (a < 0) a = -a;
646        U_ASSERT(IS_VALID_OFFSET(a));
647        nameGroupPool[i] = (Offset) nameGroupIndices[i];
648    }
649
650    stringPool_count = propertyNameCount;
651    stringPool_size = 0;
652    // first string must be "" -- we skip it
653    U_ASSERT(*propertyNames[0].str == 0);
654    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
655        stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
656    }
657    stringPool = MALLOC(char, stringPool_size);
658    stringPool_offsetArray = MALLOC(Offset, stringPool_count);
659    Offset soFar = 0;
660    char* p = stringPool;
661    stringPool_offsetArray[0] = -1; // we don't use this entry
662    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
663        const char* str = propertyNames[i].str;
664        int32_t len = (int32_t)uprv_strlen(str);
665        uprv_strcpy(p, str);
666        p += len;
667        *p++ = 0;
668        stringPool_offsetArray[i] = soFar;
669        soFar += (Offset)(len+1);
670    }
671    U_ASSERT(soFar == stringPool_size);
672    U_ASSERT(p == (stringPool + stringPool_size));
673}
674
675// Confirm that PropertyAliases is a POD (plain old data; see C++
676// std).  The following union will _fail to compile_ if
677// PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
678// macro to check this, but that's not quite right, so that test is
679// commented out -- see below.)
680typedef union {
681    int32_t i;
682    PropertyAliases p;
683} PropertyAliasesPODTest;
684
685void Builder::computeOffsets() {
686    int32_t i;
687    Offset off = sizeof(header);
688
689    if (debug>0) {
690        printf("header   \t offset=%4d  size=%5d\n", 0, off);
691    }
692
693    // PropertyAliases must have no v-table and must be
694    // padded (if necessary) to the next 32-bit boundary.
695    //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
696    U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
697
698    #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
699
700    #define COMPUTE_OFFSET2(foo,type) \
701      if (debug>0)\
702        printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
703      foo##_offset = off;\
704      U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
705      U_ASSERT(foo##_offset % sizeof(type) == 0);\
706      off = (Offset) (off + foo##_size);
707
708    COMPUTE_OFFSET(enumToName);     // 0:
709    COMPUTE_OFFSET(nameToEnum);     // 2:
710    COMPUTE_OFFSET(enumToValue);    // 3:
711    COMPUTE_OFFSET(valueMap);       // 4:
712
713    for (i=0; i<valueMap_count; ++i) {
714        if (debug>0) {
715            printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
716                   (int)i, off, (int)valueEnumToName_size[i]);
717        }
718
719        valueEnumToName_offset[i] = off;   // 5:
720        U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
721        off = (Offset) (off + valueEnumToName_size[i]);
722
723        if (debug>0) {
724            printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
725                   (int)i, off, (int)valueNameToEnum_size[i]);
726        }
727
728        valueNameToEnum_offset[i] = off;   // 6:
729        U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
730        off = (Offset) (off + valueNameToEnum_size[i]);
731    }
732
733    // These last two chunks have weaker alignment needs
734    COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
735    COMPUTE_OFFSET2(stringPool,char);      // 99:
736
737    total_size = off;
738    if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
739    U_ASSERT(total_size <= (MAX_OFFSET+1));
740}
741
742void Builder::fixupNameToEnum(NameToEnum* n) {
743    // Fix the string pool offsets in n
744    Offset* p = n->getNameArray();
745    for (int32_t i=0; i<n->count; ++i) {
746        p[i] = stringIndexToOffset(p[i]);
747    }
748}
749
750void Builder::fixupStringPoolOffsets() {
751    int32_t i;
752
753    // 2:
754    fixupNameToEnum(nameToEnum);
755
756    // 6:
757    for (i=0; i<valueMap_count; ++i) {
758        fixupNameToEnum(valueNameToEnum[i]);
759    }
760
761    // 98:
762    for (i=0; i<nameGroupPool_count; ++i) {
763        nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
764    }
765}
766
767void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
768    EnumValue i;
769    int32_t j;
770    Offset* p = e2ng->getOffsetArray();
771    for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
772        p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
773    }
774}
775
776void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
777    int32_t i;
778    /*EnumValue* e = e2ng->getEnumArray();*/
779    Offset* p = e2ng->getOffsetArray();
780    for (i=0; i<e2ng->count; ++i) {
781        p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
782    }
783}
784
785void Builder::fixupNameGroupPoolOffsets() {
786    int32_t i;
787
788    // 0:
789    fixupNCEnumToNameGroup(enumToName);
790
791    // 1: (deleted)
792
793    // 5:
794    for (i=0; i<valueMap_count; ++i) {
795        // 5a:
796        if (valueEnumToName[i] != 0) {
797            fixupEnumToNameGroup(valueEnumToName[i]);
798        }
799        // 5b:
800        if (valueNCEnumToName[i] != 0) {
801            fixupNCEnumToNameGroup(valueNCEnumToName[i]);
802        }
803    }
804}
805
806void Builder::fixupMiscellaneousOffsets() {
807    int32_t i;
808
809    // header:
810    erase(&header, sizeof(header));
811    header.enumToName_offset = enumToName_offset;
812    header.nameToEnum_offset = nameToEnum_offset;
813    header.enumToValue_offset = enumToValue_offset;
814    // header meta-info used by Java:
815    U_ASSERT(total_size > 0 && total_size < 0x7FFF);
816    header.total_size = (int16_t) total_size;
817    header.valueMap_offset = valueMap_offset;
818    header.valueMap_count = (int16_t) valueMap_count;
819    header.nameGroupPool_offset = nameGroupPool_offset;
820    header.nameGroupPool_count = (int16_t) nameGroupPool_count;
821    header.stringPool_offset = stringPool_offset;
822    header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
823
824    U_ASSERT(valueMap_count <= 0x7FFF);
825    U_ASSERT(nameGroupPool_count <= 0x7FFF);
826    U_ASSERT(stringPool_count <= 0x7FFF);
827
828    // 3:
829    Offset* p = enumToValue->getOffsetArray();
830    /*EnumValue* e = enumToValue->getEnumArray();*/
831    U_ASSERT(valueMap_count == enumToValue->count);
832    for (i=0; i<valueMap_count; ++i) {
833        p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
834    }
835
836    // 4:
837    for (i=0; i<valueMap_count; ++i) {
838        ValueMap& v = valueMap[i];
839        v.enumToName_offset = v.ncEnumToName_offset = 0;
840        if (valueEnumToName[i] != 0) {
841            v.enumToName_offset = valueEnumToName_offset[i];
842        }
843        if (valueNCEnumToName[i] != 0) {
844            v.ncEnumToName_offset = valueEnumToName_offset[i];
845        }
846        v.nameToEnum_offset = valueNameToEnum_offset[i];
847    }
848}
849
850void Builder::fixup() {
851    computeOffsets();
852    fixupStringPoolOffsets();
853    fixupNameGroupPoolOffsets();
854    fixupMiscellaneousOffsets();
855}
856
857int8_t* Builder::createData(int32_t& length) const {
858    length = total_size;
859    int8_t* result = MALLOC(int8_t, length);
860
861    int8_t* p = result;
862    int8_t* limit = result + length;
863
864    #define APPEND2(x, size)   \
865      U_ASSERT((p+size)<=limit); \
866      uprv_memcpy(p, x, size); \
867      p += size
868
869    #define APPEND(x) APPEND2(x, x##_size)
870
871    APPEND2(&header, sizeof(header));
872    APPEND(enumToName);
873    APPEND(nameToEnum);
874    APPEND(enumToValue);
875    APPEND(valueMap);
876
877    for (int32_t i=0; i<valueMap_count; ++i) {
878        U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
879               (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
880        if (valueEnumToName[i] != 0) {
881            APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
882        }
883        if (valueNCEnumToName[i] != 0) {
884            APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
885        }
886        APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
887    }
888
889    APPEND(nameGroupPool);
890    APPEND(stringPool);
891
892    if (p != limit) {
893        fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
894        exit(1);
895    }
896    return result;
897}
898
899// END Builder
900//----------------------------------------------------------------------
901
902/* UDataInfo cf. udata.h */
903static UDataInfo dataInfo = {
904    sizeof(UDataInfo),
905    0,
906
907    U_IS_BIG_ENDIAN,
908    U_CHARSET_FAMILY,
909    sizeof(UChar),
910    0,
911
912    {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
913    {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
914    {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
915};
916
917class genpname {
918
919    // command-line options
920    UBool useCopyright;
921    UBool verbose;
922    int32_t debug;
923
924public:
925    int      MMain(int argc, char *argv[]);
926
927private:
928    NameToEnumEntry* createNameIndex(const AliasList& list,
929                                     int32_t& nameIndexCount);
930
931    EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
932
933    int32_t  writeDataFile(const char *destdir, const Builder&);
934};
935
936int main(int argc, char *argv[]) {
937    UErrorCode status = U_ZERO_ERROR;
938    u_init(&status);
939    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
940        // Note: u_init() will try to open ICU property data.
941        //       failures here are expected when building ICU from scratch.
942        //       ignore them.
943        fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
944            u_errorName(status));
945        exit(1);
946    }
947
948    genpname app;
949    U_MAIN_INIT_ARGS(argc, argv);
950    int retVal = app.MMain(argc, argv);
951    u_cleanup();
952    return retVal;
953}
954
955static UOption options[]={
956    UOPTION_HELP_H,
957    UOPTION_HELP_QUESTION_MARK,
958    UOPTION_COPYRIGHT,
959    UOPTION_DESTDIR,
960    UOPTION_VERBOSE,
961    UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
962};
963
964NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
965                                           int32_t& nameIndexCount) {
966
967    // Build name => enum map
968
969    // This is an n->1 map.  There are typically multiple names
970    // mapping to one enum.  The name index is sorted in order of the name,
971    // as defined by the uprv_compareAliasNames() function.
972
973    int32_t i, j;
974    int32_t count = list.count();
975
976    // compute upper limit on number of names in the index
977    int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
978    NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
979
980    nameIndexCount = 0;
981    int32_t names[MAX_NAMES_PER_GROUP];
982    for (i=0; i<count; ++i) {
983        const Alias& p = list[i];
984        int32_t n = p.getUniqueNames(names);
985        for (j=0; j<n; ++j) {
986            U_ASSERT(nameIndexCount < nameIndexCapacity);
987            nameIndex[nameIndexCount++] =
988                NameToEnumEntry(names[j], p.enumValue);
989        }
990    }
991
992    /*
993     * use a stable sort to ensure consistent results between
994     * genpname.cpp and the propname.cpp swapping code
995     */
996    UErrorCode errorCode = U_ZERO_ERROR;
997    uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
998                   compareNameToEnumEntry, NULL, TRUE, &errorCode);
999    if (debug>1) {
1000        printf("Alias names: %d\n", (int)nameIndexCount);
1001        for (i=0; i<nameIndexCount; ++i) {
1002            printf("%s => %d\n",
1003                   STRING_TABLE[nameIndex[i].nameIndex].str,
1004                   (int)nameIndex[i].enumValue);
1005        }
1006        printf("\n");
1007    }
1008    // make sure there are no duplicates.  for a sorted list we need
1009    // only compare adjacent items.  Alias.getUniqueNames() has
1010    // already eliminated duplicate names for a single property, which
1011    // does occur, so we're checking for duplicate names between two
1012    // properties, which should never occur.
1013    UBool ok = TRUE;
1014    for (i=1; i<nameIndexCount; ++i) {
1015        if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1016            STRING_TABLE[nameIndex[i].nameIndex]) {
1017            printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1018                   STRING_TABLE[nameIndex[i-1].nameIndex].str,
1019                   STRING_TABLE[nameIndex[i].nameIndex].str);
1020            ok = FALSE;
1021        }
1022    }
1023    if (!ok) {
1024        die("Two or more duplicate names in property list");
1025    }
1026
1027    return nameIndex;
1028}
1029
1030EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1031
1032    // Build the enum => name map
1033
1034    // This is a 1->n map.  Each enum maps to 1 or more names.  To
1035    // accomplish this the index entry points to an element of the
1036    // NAME_GROUP array.  This is the short name (which may be empty).
1037    // From there, subsequent elements of NAME_GROUP are alternate
1038    // names for this enum, up to and including the first one that is
1039    // negative (negate for actual index).
1040
1041    int32_t i, j, k;
1042    int32_t count = list.count();
1043
1044    EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1045    for (i=0; i<count; ++i) {
1046        const Alias& p = list[i];
1047        enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1048    }
1049
1050    UErrorCode errorCode = U_ZERO_ERROR;
1051    uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1052                   compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1053    if (debug>1) {
1054        printf("Property enums: %d\n", (int)count);
1055        for (i=0; i<count; ++i) {
1056            printf("%d => %d: ",
1057                   (int)enumIndex[i].enumValue,
1058                   (int)enumIndex[i].nameGroupIndex);
1059            UBool done = FALSE;
1060            for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1061                k = NAME_GROUP[j];
1062                if (k < 0) {
1063                    k = -k;
1064                    done = TRUE;
1065                }
1066                printf("\"%s\"", STRING_TABLE[k].str);
1067                if (!done) printf(", ");
1068            }
1069            printf("\n");
1070        }
1071        printf("\n");
1072    }
1073    return enumIndex;
1074}
1075
1076int genpname::MMain(int argc, char* argv[])
1077{
1078    int32_t i, j;
1079    UErrorCode status = U_ZERO_ERROR;
1080
1081    u_init(&status);
1082    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1083        fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1084        status = U_ZERO_ERROR;
1085    }
1086
1087
1088    /* preset then read command line options */
1089    options[3].value=u_getDataDirectory();
1090    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1091
1092    /* error handling, printing usage message */
1093    if (argc<0) {
1094        fprintf(stderr,
1095            "error in command line argument \"%s\"\n",
1096            argv[-argc]);
1097    }
1098
1099    debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1100
1101    if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1102       debug < 0 || debug > 9) {
1103        fprintf(stderr,
1104            "usage: %s [-options]\n"
1105            "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1106            "options:\n"
1107            "\t-h or -? or --help  this usage text\n"
1108            "\t-v or --verbose     turn on verbose output\n"
1109            "\t-c or --copyright   include a copyright notice\n"
1110            "\t-d or --destdir     destination directory, followed by the path\n"
1111            "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
1112            argv[0]);
1113        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1114    }
1115
1116    /* get the options values */
1117    useCopyright=options[2].doesOccur;
1118    verbose = options[4].doesOccur;
1119
1120    // ------------------------------------------------------------
1121    // Do not sort the string table, instead keep it in data.h order.
1122    // This simplifies data swapping and testing thereof because the string
1123    // table itself need not be sorted during swapping.
1124    // The NameToEnum sorter sorts each such map's string offsets instead.
1125
1126    if (debug>1) {
1127        printf("String pool: %d\n", (int)STRING_COUNT);
1128        for (i=0; i<STRING_COUNT; ++i) {
1129            if (i != 0) {
1130                printf(", ");
1131            }
1132            printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1133        }
1134        printf("\n\n");
1135    }
1136
1137    // ------------------------------------------------------------
1138    // Create top-level property indices
1139
1140    PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1141    int32_t propNameCount;
1142    NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1143    EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1144
1145    // ------------------------------------------------------------
1146    // Create indices for the value list for each enumerated property
1147
1148    // This will have more entries than we need...
1149    EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1150    int32_t enumToValue_count = 0;
1151    for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1152        if (PROPERTY[i].valueCount == 0) continue;
1153        AliasArrayList values(PROPERTY[i].valueList,
1154                              PROPERTY[i].valueCount);
1155        enumToValue[j].enumValue = PROPERTY[i].enumValue;
1156        enumToValue[j].enumToName = createEnumIndex(values);
1157        enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1158        enumToValue[j].nameToEnum = createNameIndex(values,
1159                                                    enumToValue[j].nameToEnum_count);
1160        ++j;
1161    }
1162    enumToValue_count = j;
1163
1164    uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1165                   compareEnumToValueEntry, NULL, FALSE, &status);
1166
1167    // ------------------------------------------------------------
1168    // Build PropertyAliases layout in memory
1169
1170    Builder builder(debug);
1171
1172    builder.buildTopLevelProperties(propName,
1173                                    propNameCount,
1174                                    propEnum,
1175                                    PROPERTY_COUNT);
1176
1177    builder.buildValues(enumToValue,
1178                        enumToValue_count);
1179
1180    builder.buildStringPool(STRING_TABLE,
1181                            STRING_COUNT,
1182                            NAME_GROUP,
1183                            NAME_GROUP_COUNT);
1184
1185    builder.fixup();
1186
1187    ////////////////////////////////////////////////////////////
1188    // Write the output file
1189    ////////////////////////////////////////////////////////////
1190    int32_t wlen = writeDataFile(options[3].value, builder);
1191    if (verbose) {
1192        fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1193            U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1194    }
1195
1196    return 0; // success
1197}
1198
1199int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1200    int32_t length;
1201    int8_t* data = builder.createData(length);
1202
1203    UNewDataMemory *pdata;
1204    UErrorCode status = U_ZERO_ERROR;
1205
1206    pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1207                         useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1208    if (U_FAILURE(status)) {
1209        die("Unable to create data memory");
1210    }
1211
1212    udata_writeBlock(pdata, data, length);
1213
1214    int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1215    if (U_FAILURE(status)) {
1216        die("Error writing output file");
1217    }
1218    if (dataLength != length) {
1219        die("Written file doesn't match expected size");
1220    }
1221
1222    return dataLength;
1223}
1224
1225//eof
1226