1/*
2**********************************************************************
3*   Copyright (C) 2002-2010, International Business Machines
4*   Corporation and others.  All Rights Reserved.
5**********************************************************************
6*   Date        Name        Description
7*   10/11/02    aliu        Creation.
8**********************************************************************
9*/
10
11#include "unicode/utypes.h"
12#include "unicode/putil.h"
13#include "unicode/uclean.h"
14#include "cmemory.h"
15#include "cstring.h"
16#include "filestrm.h"
17#include "uarrsort.h"
18#include "unewdata.h"
19#include "uoptions.h"
20#include "uprops.h"
21#include "propname.h"
22#include "uassert.h"
23
24#include <stdio.h>
25
26U_NAMESPACE_USE
27
28// TODO: Clean up and comment this code.
29
30//----------------------------------------------------------------------
31// BEGIN DATA
32//
33// This is the raw data to be output.  We define the data structure,
34// then include a machine-generated header that contains the actual
35// data.
36
37#include "unicode/uchar.h"
38#include "unicode/uscript.h"
39#include "unicode/unorm.h"
40#include "unicode/unorm2.h"
41
42class AliasName {
43public:
44    const char* str;
45    int32_t     index;
46
47    AliasName(const char* str, int32_t index);
48
49    int compare(const AliasName& other) const;
50
51    UBool operator==(const AliasName& other) const {
52        return compare(other) == 0;
53    }
54
55    UBool operator!=(const AliasName& other) const {
56        return compare(other) != 0;
57    }
58};
59
60AliasName::AliasName(const char* _str,
61               int32_t _index) :
62    str(_str),
63    index(_index)
64{
65}
66
67int AliasName::compare(const AliasName& other) const {
68    return uprv_comparePropertyNames(str, other.str);
69}
70
71class Alias {
72public:
73    int32_t     enumValue;
74    int32_t     nameGroupIndex;
75
76    Alias(int32_t enumValue,
77             int32_t nameGroupIndex);
78
79    int32_t getUniqueNames(int32_t* nameGroupIndices) const;
80};
81
82Alias::Alias(int32_t anEnumValue,
83                   int32_t aNameGroupIndex) :
84    enumValue(anEnumValue),
85    nameGroupIndex(aNameGroupIndex)
86{
87}
88
89class Property : public Alias {
90public:
91    int32_t         valueCount;
92    const Alias* valueList;
93
94    Property(int32_t enumValue,
95                       int32_t nameGroupIndex,
96                       int32_t valueCount,
97                       const Alias* valueList);
98};
99
100Property::Property(int32_t _enumValue,
101                                       int32_t _nameGroupIndex,
102                                       int32_t _valueCount,
103                                       const Alias* _valueList) :
104    Alias(_enumValue, _nameGroupIndex),
105    valueCount(_valueCount),
106    valueList(_valueList)
107{
108}
109
110// *** Include the data header ***
111#include "data.h"
112
113/* return a list of unique names, not including "", for this property
114 * @param stringIndices array of at least MAX_NAMES_PER_GROUP
115 * elements, will be filled with indices into STRING_TABLE
116 * @return number of indices, >= 1
117 */
118int32_t Alias::getUniqueNames(int32_t* stringIndices) const {
119    int32_t count = 0;
120    int32_t i = nameGroupIndex;
121    UBool done = FALSE;
122    while (!done) {
123        int32_t j = NAME_GROUP[i++];
124        if (j < 0) {
125            done = TRUE;
126            j = -j;
127        }
128        if (j == 0) continue; // omit "" entries
129        UBool dupe = FALSE;
130        for (int32_t k=0; k<count; ++k) {
131            if (stringIndices[k] == j) {
132                dupe = TRUE;
133                break;
134            }
135            // also do a string check for things like "age|Age"
136            if (STRING_TABLE[stringIndices[k]] == STRING_TABLE[j]) {
137                //printf("Found dupe %s|%s\n",
138                //       STRING_TABLE[stringIndices[k]].str,
139                //       STRING_TABLE[j].str);
140                dupe = TRUE;
141                break;
142            }
143        }
144        if (dupe) continue; // omit duplicates
145        stringIndices[count++] = j;
146    }
147    return count;
148}
149
150// END DATA
151//----------------------------------------------------------------------
152
153#define MALLOC(type, count) \
154  (type*) uprv_malloc(sizeof(type) * count)
155
156void die(const char* msg) {
157    fprintf(stderr, "Error: %s\n", msg);
158    exit(1);
159}
160
161//----------------------------------------------------------------------
162
163/**
164 * A list of Alias objects.
165 */
166class AliasList {
167public:
168    virtual ~AliasList();
169    virtual const Alias& operator[](int32_t i) const = 0;
170    virtual int32_t count() const = 0;
171};
172
173AliasList::~AliasList() {}
174
175/**
176 * A single array.
177 */
178class AliasArrayList : public AliasList {
179    const Alias* a;
180    int32_t n;
181public:
182    AliasArrayList(const Alias* _a, int32_t _n) {
183        a = _a;
184        n = _n;
185    }
186    virtual const Alias& operator[](int32_t i) const {
187        return a[i];
188    }
189    virtual int32_t count() const {
190        return n;
191    }
192};
193
194/**
195 * A single array.
196 */
197class PropertyArrayList : public AliasList {
198    const Property* a;
199    int32_t n;
200public:
201    PropertyArrayList(const Property* _a, int32_t _n) {
202        a = _a;
203        n = _n;
204    }
205    virtual const Alias& operator[](int32_t i) const {
206        return a[i];
207    }
208    virtual int32_t count() const {
209        return n;
210    }
211};
212
213//----------------------------------------------------------------------
214
215/**
216 * An element in a name index.  It maps a name (given by index) into
217 * an enum value.
218 */
219class NameToEnumEntry {
220public:
221    int32_t nameIndex;
222    int32_t enumValue;
223    NameToEnumEntry(int32_t a, int32_t b) { nameIndex=a; enumValue=b; }
224};
225
226// Sort function for NameToEnumEntry (sort by name)
227U_CFUNC int32_t
228compareNameToEnumEntry(const void * /*context*/, const void* e1, const void* e2) {
229    return
230        STRING_TABLE[((NameToEnumEntry*)e1)->nameIndex].
231            compare(STRING_TABLE[((NameToEnumEntry*)e2)->nameIndex]);
232}
233
234//----------------------------------------------------------------------
235
236/**
237 * An element in an enum index.  It maps an enum into a name group entry
238 * (given by index).
239 */
240class EnumToNameGroupEntry {
241public:
242    int32_t enumValue;
243    int32_t nameGroupIndex;
244    EnumToNameGroupEntry(int32_t a, int32_t b) { enumValue=a; nameGroupIndex=b; }
245
246    // are enumValues contiguous for count entries starting with this one?
247    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
248    UBool isContiguous(int32_t count) const {
249        const EnumToNameGroupEntry* p = this;
250        for (int32_t i=1; i<count; ++i) {
251            if (p[i].enumValue != (this->enumValue + i)) {
252                return FALSE;
253            }
254        }
255        return TRUE;
256    }
257};
258
259// Sort function for EnumToNameGroupEntry (sort by name index)
260U_CFUNC int32_t
261compareEnumToNameGroupEntry(const void * /*context*/, const void* e1, const void* e2) {
262    return ((EnumToNameGroupEntry*)e1)->enumValue - ((EnumToNameGroupEntry*)e2)->enumValue;
263}
264
265//----------------------------------------------------------------------
266
267/**
268 * An element in the map from enumerated property enums to value maps.
269 */
270class EnumToValueEntry {
271public:
272    int32_t enumValue;
273    EnumToNameGroupEntry* enumToName;
274    int32_t enumToName_count;
275    NameToEnumEntry* nameToEnum;
276    int32_t nameToEnum_count;
277
278    // are enumValues contiguous for count entries starting with this one?
279    // ***!!!*** we assume we are in an array and look at neighbors ***!!!***
280    UBool isContiguous(int32_t count) const {
281        const EnumToValueEntry* p = this;
282        for (int32_t i=1; i<count; ++i) {
283            if (p[i].enumValue != (this->enumValue + i)) {
284                return FALSE;
285            }
286        }
287        return TRUE;
288    }
289};
290
291// Sort function for EnumToValueEntry (sort by enum)
292U_CFUNC int32_t
293compareEnumToValueEntry(const void * /*context*/, const void* e1, const void* e2) {
294    return ((EnumToValueEntry*)e1)->enumValue - ((EnumToValueEntry*)e2)->enumValue;
295}
296
297//----------------------------------------------------------------------
298// BEGIN Builder
299
300#define IS_VALID_OFFSET(x) (((x)>=0)&&((x)<=MAX_OFFSET))
301
302class Builder {
303    // header:
304    PropertyAliases header;
305
306    // 0:
307    NonContiguousEnumToOffset* enumToName;
308    int32_t enumToName_size;
309    Offset enumToName_offset;
310
311    // 1: (deleted)
312
313    // 2:
314    NameToEnum* nameToEnum;
315    int32_t nameToEnum_size;
316    Offset nameToEnum_offset;
317
318    // 3:
319    NonContiguousEnumToOffset* enumToValue;
320    int32_t enumToValue_size;
321    Offset enumToValue_offset;
322
323    // 4:
324    ValueMap* valueMap;
325    int32_t valueMap_size;
326    int32_t valueMap_count;
327    Offset valueMap_offset;
328
329    // for any i, one of valueEnumToName[i] or valueNCEnumToName[i] is
330    // NULL and one is not.  valueEnumToName_size[i] is the size of
331    // the non-NULL one.  i=0..valueMapCount-1
332    // 5a:
333    EnumToOffset** valueEnumToName;
334    // 5b:
335    NonContiguousEnumToOffset** valueNCEnumToName;
336    int32_t* valueEnumToName_size;
337    Offset* valueEnumToName_offset;
338    // 6:
339    // arrays of valueMap_count pointers, sizes, & offsets
340    NameToEnum** valueNameToEnum;
341    int32_t* valueNameToEnum_size;
342    Offset* valueNameToEnum_offset;
343
344    // 98:
345    Offset* nameGroupPool;
346    int32_t nameGroupPool_count;
347    int32_t nameGroupPool_size;
348    Offset nameGroupPool_offset;
349
350    // 99:
351    char* stringPool;
352    int32_t stringPool_count;
353    int32_t stringPool_size;
354    Offset stringPool_offset;
355    Offset* stringPool_offsetArray; // relative to stringPool
356
357    int32_t total_size; // size of everything
358
359    int32_t debug;
360
361public:
362
363    Builder(int32_t debugLevel);
364    ~Builder();
365
366    void buildTopLevelProperties(const NameToEnumEntry* propName,
367                                 int32_t propNameCount,
368                                 const EnumToNameGroupEntry* propEnum,
369                                 int32_t propEnumCount);
370
371    void buildValues(const EnumToValueEntry* e2v,
372                     int32_t count);
373
374    void buildStringPool(const AliasName* propertyNames,
375                         int32_t propertyNameCount,
376                         const int32_t* nameGroupIndices,
377                         int32_t nameGroupIndicesCount);
378
379    void fixup();
380
381    int8_t* createData(int32_t& length) const;
382
383private:
384
385    static EnumToOffset* buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
386                                           int32_t count,
387                                           int32_t& size);
388    static NonContiguousEnumToOffset*
389        buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
390                               int32_t count,
391                               int32_t& size);
392
393    static NonContiguousEnumToOffset*
394        buildNCEnumToValue(const EnumToValueEntry* e2v,
395                           int32_t count,
396                           int32_t& size);
397
398    static NameToEnum* buildNameToEnum(const NameToEnumEntry* nameToEnum,
399                                       int32_t count,
400                                       int32_t& size);
401
402    Offset stringIndexToOffset(int32_t index, UBool allowNeg=FALSE) const;
403    void fixupNameToEnum(NameToEnum* n);
404    void fixupEnumToNameGroup(EnumToOffset* e2ng);
405    void fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng);
406
407    void computeOffsets();
408    void fixupStringPoolOffsets();
409    void fixupNameGroupPoolOffsets();
410    void fixupMiscellaneousOffsets();
411
412    static int32_t align(int32_t a);
413    static void erase(void* p, int32_t size);
414};
415
416Builder::Builder(int32_t debugLevel) {
417    debug = debugLevel;
418    enumToName = 0;
419    nameToEnum = 0;
420    enumToValue = 0;
421    valueMap_count = 0;
422    valueMap = 0;
423    valueEnumToName = 0;
424    valueNCEnumToName = 0;
425    valueEnumToName_size = 0;
426    valueEnumToName_offset = 0;
427    valueNameToEnum = 0;
428    valueNameToEnum_size = 0;
429    valueNameToEnum_offset = 0;
430    nameGroupPool = 0;
431    stringPool = 0;
432    stringPool_offsetArray = 0;
433}
434
435Builder::~Builder() {
436    uprv_free(enumToName);
437    uprv_free(nameToEnum);
438    uprv_free(enumToValue);
439    uprv_free(valueMap);
440    for (int32_t i=0; i<valueMap_count; ++i) {
441        uprv_free(valueEnumToName[i]);
442        uprv_free(valueNCEnumToName[i]);
443        uprv_free(valueNameToEnum[i]);
444    }
445    uprv_free(valueEnumToName);
446    uprv_free(valueNCEnumToName);
447    uprv_free(valueEnumToName_size);
448    uprv_free(valueEnumToName_offset);
449    uprv_free(valueNameToEnum);
450    uprv_free(valueNameToEnum_size);
451    uprv_free(valueNameToEnum_offset);
452    uprv_free(nameGroupPool);
453    uprv_free(stringPool);
454    uprv_free(stringPool_offsetArray);
455}
456
457int32_t Builder::align(int32_t a) {
458    U_ASSERT(a >= 0);
459    int32_t k = a % sizeof(int32_t);
460    if (k == 0) {
461        return a;
462    }
463    a += sizeof(int32_t) - k;
464    return a;
465}
466
467void Builder::erase(void* p, int32_t size) {
468    U_ASSERT(size >= 0);
469    int8_t* q = (int8_t*) p;
470    while (size--) {
471        *q++ = 0;
472    }
473}
474
475EnumToOffset* Builder::buildEnumToOffset(const EnumToNameGroupEntry* e2ng,
476                                         int32_t count,
477                                         int32_t& size) {
478    U_ASSERT(e2ng->isContiguous(count));
479    size = align(EnumToOffset::getSize(count));
480    EnumToOffset* result = (EnumToOffset*) uprv_malloc(size);
481    erase(result, size);
482    result->enumStart = e2ng->enumValue;
483    result->enumLimit = e2ng->enumValue + count;
484    Offset* p = result->getOffsetArray();
485    for (int32_t i=0; i<count; ++i) {
486        // set these to NGI index values
487        // fix them up to NGI offset values
488        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
489        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
490    }
491    return result;
492}
493
494NonContiguousEnumToOffset*
495Builder::buildNCEnumToNameGroup(const EnumToNameGroupEntry* e2ng,
496                                int32_t count,
497                                int32_t& size) {
498    U_ASSERT(!e2ng->isContiguous(count));
499    size = align(NonContiguousEnumToOffset::getSize(count));
500    NonContiguousEnumToOffset* nc = (NonContiguousEnumToOffset*) uprv_malloc(size);
501    erase(nc, size);
502    nc->count = count;
503    EnumValue* e = nc->getEnumArray();
504    Offset* p = nc->getOffsetArray();
505    for (int32_t i=0; i<count; ++i) {
506        // set these to NGI index values
507        // fix them up to NGI offset values
508        e[i] = e2ng[i].enumValue;
509        U_ASSERT(IS_VALID_OFFSET(e2ng[i].nameGroupIndex));
510        p[i] = (Offset) e2ng[i].nameGroupIndex; // FIXUP later
511    }
512    return nc;
513}
514
515NonContiguousEnumToOffset*
516Builder::buildNCEnumToValue(const EnumToValueEntry* e2v,
517                            int32_t count,
518                            int32_t& size) {
519    U_ASSERT(!e2v->isContiguous(count));
520    size = align(NonContiguousEnumToOffset::getSize(count));
521    NonContiguousEnumToOffset* result = (NonContiguousEnumToOffset*) uprv_malloc(size);
522    erase(result, size);
523    result->count = count;
524    EnumValue* e = result->getEnumArray();
525    for (int32_t i=0; i<count; ++i) {
526        e[i] = e2v[i].enumValue;
527        // offset must be set later
528    }
529    return result;
530}
531
532/**
533 * Given an index into the string pool, return an offset.  computeOffsets()
534 * must have been called already.  If allowNegative is true, allow negatives
535 * and preserve their sign.
536 */
537Offset Builder::stringIndexToOffset(int32_t index, UBool allowNegative) const {
538    // Index 0 is ""; we turn this into an Offset of zero
539    if (index == 0) return 0;
540    if (index < 0) {
541        if (allowNegative) {
542            return -Builder::stringIndexToOffset(-index);
543        } else {
544            die("Negative string pool index");
545        }
546    } else {
547        if (index >= stringPool_count) {
548            die("String pool index too large");
549        }
550        Offset result = stringPool_offset + stringPool_offsetArray[index];
551        U_ASSERT(result >= 0 && result < total_size);
552        return result;
553    }
554    return 0; // never executed; make compiler happy
555}
556
557NameToEnum* Builder::buildNameToEnum(const NameToEnumEntry* nameToEnum,
558                                     int32_t count,
559                                     int32_t& size) {
560    size = align(NameToEnum::getSize(count));
561    NameToEnum* n2e = (NameToEnum*) uprv_malloc(size);
562    erase(n2e, size);
563    n2e->count = count;
564    Offset* p = n2e->getNameArray();
565    EnumValue* e = n2e->getEnumArray();
566    for (int32_t i=0; i<count; ++i) {
567        // set these to SP index values
568        // fix them up to SP offset values
569        U_ASSERT(IS_VALID_OFFSET(nameToEnum[i].nameIndex));
570        p[i] = (Offset) nameToEnum[i].nameIndex; // FIXUP later
571        e[i] = nameToEnum[i].enumValue;
572    }
573    return n2e;
574}
575
576
577void Builder::buildTopLevelProperties(const NameToEnumEntry* propName,
578                                      int32_t propNameCount,
579                                      const EnumToNameGroupEntry* propEnum,
580                                      int32_t propEnumCount) {
581    enumToName = buildNCEnumToNameGroup(propEnum,
582                                        propEnumCount,
583                                        enumToName_size);
584    nameToEnum = buildNameToEnum(propName,
585                                 propNameCount,
586                                 nameToEnum_size);
587}
588
589void Builder::buildValues(const EnumToValueEntry* e2v,
590                          int32_t count) {
591    int32_t i;
592
593    U_ASSERT(!e2v->isContiguous(count));
594
595    valueMap_count = count;
596
597    enumToValue = buildNCEnumToValue(e2v, count,
598                                     enumToValue_size);
599
600    valueMap_size = align(count * sizeof(ValueMap));
601    valueMap = (ValueMap*) uprv_malloc(valueMap_size);
602    erase(valueMap, valueMap_size);
603
604    valueEnumToName = MALLOC(EnumToOffset*, count);
605    valueNCEnumToName = MALLOC(NonContiguousEnumToOffset*, count);
606    valueEnumToName_size = MALLOC(int32_t, count);
607    valueEnumToName_offset = MALLOC(Offset, count);
608    valueNameToEnum = MALLOC(NameToEnum*, count);
609    valueNameToEnum_size = MALLOC(int32_t, count);
610    valueNameToEnum_offset = MALLOC(Offset, count);
611
612    for (i=0; i<count; ++i) {
613        UBool isContiguous =
614            e2v[i].enumToName->isContiguous(e2v[i].enumToName_count);
615        valueEnumToName[i] = 0;
616        valueNCEnumToName[i] = 0;
617        if (isContiguous) {
618            valueEnumToName[i] = buildEnumToOffset(e2v[i].enumToName,
619                                                   e2v[i].enumToName_count,
620                                                   valueEnumToName_size[i]);
621        } else {
622            valueNCEnumToName[i] = buildNCEnumToNameGroup(e2v[i].enumToName,
623                                                          e2v[i].enumToName_count,
624                                                          valueEnumToName_size[i]);
625        }
626        valueNameToEnum[i] =
627            buildNameToEnum(e2v[i].nameToEnum,
628                            e2v[i].nameToEnum_count,
629                            valueNameToEnum_size[i]);
630    }
631}
632
633void Builder::buildStringPool(const AliasName* propertyNames,
634                              int32_t propertyNameCount,
635                              const int32_t* nameGroupIndices,
636                              int32_t nameGroupIndicesCount) {
637    int32_t i;
638
639    nameGroupPool_count = nameGroupIndicesCount;
640    nameGroupPool_size = sizeof(Offset) * nameGroupPool_count;
641    nameGroupPool = MALLOC(Offset, nameGroupPool_count);
642
643    for (i=0; i<nameGroupPool_count; ++i) {
644        // Some indices are negative.
645        int32_t a = nameGroupIndices[i];
646        if (a < 0) a = -a;
647        U_ASSERT(IS_VALID_OFFSET(a));
648        nameGroupPool[i] = (Offset) nameGroupIndices[i];
649    }
650
651    stringPool_count = propertyNameCount;
652    stringPool_size = 0;
653    // first string must be "" -- we skip it
654    U_ASSERT(*propertyNames[0].str == 0);
655    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
656        stringPool_size += (int32_t)(uprv_strlen(propertyNames[i].str) + 1);
657    }
658    stringPool = MALLOC(char, stringPool_size);
659    stringPool_offsetArray = MALLOC(Offset, stringPool_count);
660    Offset soFar = 0;
661    char* p = stringPool;
662    stringPool_offsetArray[0] = -1; // we don't use this entry
663    for (i=1 /*sic*/; i<propertyNameCount; ++i) {
664        const char* str = propertyNames[i].str;
665        int32_t len = (int32_t)uprv_strlen(str);
666        uprv_strcpy(p, str);
667        p += len;
668        *p++ = 0;
669        stringPool_offsetArray[i] = soFar;
670        soFar += (Offset)(len+1);
671    }
672    U_ASSERT(soFar == stringPool_size);
673    U_ASSERT(p == (stringPool + stringPool_size));
674}
675
676// Confirm that PropertyAliases is a POD (plain old data; see C++
677// std).  The following union will _fail to compile_ if
678// PropertyAliases is _not_ a POD.  (Note: We used to use the offsetof
679// macro to check this, but that's not quite right, so that test is
680// commented out -- see below.)
681typedef union {
682    int32_t i;
683    PropertyAliases p;
684} PropertyAliasesPODTest;
685
686void Builder::computeOffsets() {
687    int32_t i;
688    Offset off = sizeof(header);
689
690    if (debug>0) {
691        printf("header   \t offset=%4d  size=%5d\n", 0, off);
692    }
693
694    // PropertyAliases must have no v-table and must be
695    // padded (if necessary) to the next 32-bit boundary.
696    //U_ASSERT(offsetof(PropertyAliases, enumToName_offset) == 0); // see above
697    U_ASSERT(sizeof(header) % sizeof(int32_t) == 0);
698
699    #define COMPUTE_OFFSET(foo) COMPUTE_OFFSET2(foo,int32_t)
700
701    #define COMPUTE_OFFSET2(foo,type) \
702      if (debug>0)\
703        printf(#foo "\t offset=%4d  size=%5d\n", off, (int)foo##_size);\
704      foo##_offset = off;\
705      U_ASSERT(IS_VALID_OFFSET(off + foo##_size));\
706      U_ASSERT(foo##_offset % sizeof(type) == 0);\
707      off = (Offset) (off + foo##_size);
708
709    COMPUTE_OFFSET(enumToName);     // 0:
710    COMPUTE_OFFSET(nameToEnum);     // 2:
711    COMPUTE_OFFSET(enumToValue);    // 3:
712    COMPUTE_OFFSET(valueMap);       // 4:
713
714    for (i=0; i<valueMap_count; ++i) {
715        if (debug>0) {
716            printf(" enumToName[%d]\t offset=%4d  size=%5d\n",
717                   (int)i, off, (int)valueEnumToName_size[i]);
718        }
719
720        valueEnumToName_offset[i] = off;   // 5:
721        U_ASSERT(IS_VALID_OFFSET(off + valueEnumToName_size[i]));
722        off = (Offset) (off + valueEnumToName_size[i]);
723
724        if (debug>0) {
725            printf(" nameToEnum[%d]\t offset=%4d  size=%5d\n",
726                   (int)i, off, (int)valueNameToEnum_size[i]);
727        }
728
729        valueNameToEnum_offset[i] = off;   // 6:
730        U_ASSERT(IS_VALID_OFFSET(off + valueNameToEnum_size[i]));
731        off = (Offset) (off + valueNameToEnum_size[i]);
732    }
733
734    // These last two chunks have weaker alignment needs
735    COMPUTE_OFFSET2(nameGroupPool,Offset); // 98:
736    COMPUTE_OFFSET2(stringPool,char);      // 99:
737
738    total_size = off;
739    if (debug>0) printf("total                         size=%5d\n\n", (int)total_size);
740    U_ASSERT(total_size <= (MAX_OFFSET+1));
741}
742
743void Builder::fixupNameToEnum(NameToEnum* n) {
744    // Fix the string pool offsets in n
745    Offset* p = n->getNameArray();
746    for (int32_t i=0; i<n->count; ++i) {
747        p[i] = stringIndexToOffset(p[i]);
748    }
749}
750
751void Builder::fixupStringPoolOffsets() {
752    int32_t i;
753
754    // 2:
755    fixupNameToEnum(nameToEnum);
756
757    // 6:
758    for (i=0; i<valueMap_count; ++i) {
759        fixupNameToEnum(valueNameToEnum[i]);
760    }
761
762    // 98:
763    for (i=0; i<nameGroupPool_count; ++i) {
764        nameGroupPool[i] = stringIndexToOffset(nameGroupPool[i], TRUE);
765    }
766}
767
768void Builder::fixupEnumToNameGroup(EnumToOffset* e2ng) {
769    EnumValue i;
770    int32_t j;
771    Offset* p = e2ng->getOffsetArray();
772    for (i=e2ng->enumStart, j=0; i<e2ng->enumLimit; ++i, ++j) {
773        p[j] = nameGroupPool_offset + sizeof(Offset) * p[j];
774    }
775}
776
777void Builder::fixupNCEnumToNameGroup(NonContiguousEnumToOffset* e2ng) {
778    int32_t i;
779    /*EnumValue* e = e2ng->getEnumArray();*/
780    Offset* p = e2ng->getOffsetArray();
781    for (i=0; i<e2ng->count; ++i) {
782        p[i] = nameGroupPool_offset + sizeof(Offset) * p[i];
783    }
784}
785
786void Builder::fixupNameGroupPoolOffsets() {
787    int32_t i;
788
789    // 0:
790    fixupNCEnumToNameGroup(enumToName);
791
792    // 1: (deleted)
793
794    // 5:
795    for (i=0; i<valueMap_count; ++i) {
796        // 5a:
797        if (valueEnumToName[i] != 0) {
798            fixupEnumToNameGroup(valueEnumToName[i]);
799        }
800        // 5b:
801        if (valueNCEnumToName[i] != 0) {
802            fixupNCEnumToNameGroup(valueNCEnumToName[i]);
803        }
804    }
805}
806
807void Builder::fixupMiscellaneousOffsets() {
808    int32_t i;
809
810    // header:
811    erase(&header, sizeof(header));
812    header.enumToName_offset = enumToName_offset;
813    header.nameToEnum_offset = nameToEnum_offset;
814    header.enumToValue_offset = enumToValue_offset;
815    // header meta-info used by Java:
816    U_ASSERT(total_size > 0 && total_size < 0x7FFF);
817    header.total_size = (int16_t) total_size;
818    header.valueMap_offset = valueMap_offset;
819    header.valueMap_count = (int16_t) valueMap_count;
820    header.nameGroupPool_offset = nameGroupPool_offset;
821    header.nameGroupPool_count = (int16_t) nameGroupPool_count;
822    header.stringPool_offset = stringPool_offset;
823    header.stringPool_count = (int16_t) stringPool_count - 1; // don't include "" entry
824
825    U_ASSERT(valueMap_count <= 0x7FFF);
826    U_ASSERT(nameGroupPool_count <= 0x7FFF);
827    U_ASSERT(stringPool_count <= 0x7FFF);
828
829    // 3:
830    Offset* p = enumToValue->getOffsetArray();
831    /*EnumValue* e = enumToValue->getEnumArray();*/
832    U_ASSERT(valueMap_count == enumToValue->count);
833    for (i=0; i<valueMap_count; ++i) {
834        p[i] = (Offset)(valueMap_offset + sizeof(ValueMap) * i);
835    }
836
837    // 4:
838    for (i=0; i<valueMap_count; ++i) {
839        ValueMap& v = valueMap[i];
840        v.enumToName_offset = v.ncEnumToName_offset = 0;
841        if (valueEnumToName[i] != 0) {
842            v.enumToName_offset = valueEnumToName_offset[i];
843        }
844        if (valueNCEnumToName[i] != 0) {
845            v.ncEnumToName_offset = valueEnumToName_offset[i];
846        }
847        v.nameToEnum_offset = valueNameToEnum_offset[i];
848    }
849}
850
851void Builder::fixup() {
852    computeOffsets();
853    fixupStringPoolOffsets();
854    fixupNameGroupPoolOffsets();
855    fixupMiscellaneousOffsets();
856}
857
858int8_t* Builder::createData(int32_t& length) const {
859    length = total_size;
860    int8_t* result = MALLOC(int8_t, length);
861
862    int8_t* p = result;
863    int8_t* limit = result + length;
864
865    #define APPEND2(x, size)   \
866      U_ASSERT((p+size)<=limit); \
867      uprv_memcpy(p, x, size); \
868      p += size
869
870    #define APPEND(x) APPEND2(x, x##_size)
871
872    APPEND2(&header, sizeof(header));
873    APPEND(enumToName);
874    APPEND(nameToEnum);
875    APPEND(enumToValue);
876    APPEND(valueMap);
877
878    for (int32_t i=0; i<valueMap_count; ++i) {
879        U_ASSERT((valueEnumToName[i] != 0 && valueNCEnumToName[i] == 0) ||
880               (valueEnumToName[i] == 0 && valueNCEnumToName[i] != 0));
881        if (valueEnumToName[i] != 0) {
882            APPEND2(valueEnumToName[i], valueEnumToName_size[i]);
883        }
884        if (valueNCEnumToName[i] != 0) {
885            APPEND2(valueNCEnumToName[i], valueEnumToName_size[i]);
886        }
887        APPEND2(valueNameToEnum[i], valueNameToEnum_size[i]);
888    }
889
890    APPEND(nameGroupPool);
891    APPEND(stringPool);
892
893    if (p != limit) {
894        fprintf(stderr, "p != limit; p = %p, limit = %p", p, limit);
895        exit(1);
896    }
897    return result;
898}
899
900// END Builder
901//----------------------------------------------------------------------
902
903/* UDataInfo cf. udata.h */
904static UDataInfo dataInfo = {
905    sizeof(UDataInfo),
906    0,
907
908    U_IS_BIG_ENDIAN,
909    U_CHARSET_FAMILY,
910    sizeof(UChar),
911    0,
912
913    {PNAME_SIG_0, PNAME_SIG_1, PNAME_SIG_2, PNAME_SIG_3},
914    {PNAME_FORMAT_VERSION, 0, 0, 0},                 /* formatVersion */
915    {VERSION_0, VERSION_1, VERSION_2, VERSION_3} /* Unicode version */
916};
917
918class genpname {
919
920    // command-line options
921    UBool useCopyright;
922    UBool verbose;
923    int32_t debug;
924
925public:
926    int      MMain(int argc, char *argv[]);
927
928private:
929    NameToEnumEntry* createNameIndex(const AliasList& list,
930                                     int32_t& nameIndexCount);
931
932    EnumToNameGroupEntry* createEnumIndex(const AliasList& list);
933
934    int32_t  writeDataFile(const char *destdir, const Builder&);
935};
936
937int main(int argc, char *argv[]) {
938    UErrorCode status = U_ZERO_ERROR;
939    u_init(&status);
940    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
941        // Note: u_init() will try to open ICU property data.
942        //       failures here are expected when building ICU from scratch.
943        //       ignore them.
944        fprintf(stderr, "genpname: can not initialize ICU.  Status = %s\n",
945            u_errorName(status));
946        exit(1);
947    }
948
949    genpname app;
950    U_MAIN_INIT_ARGS(argc, argv);
951    int retVal = app.MMain(argc, argv);
952    u_cleanup();
953    return retVal;
954}
955
956static UOption options[]={
957    UOPTION_HELP_H,
958    UOPTION_HELP_QUESTION_MARK,
959    UOPTION_COPYRIGHT,
960    UOPTION_DESTDIR,
961    UOPTION_VERBOSE,
962    UOPTION_DEF("debug", 'D', UOPT_REQUIRES_ARG),
963};
964
965NameToEnumEntry* genpname::createNameIndex(const AliasList& list,
966                                           int32_t& nameIndexCount) {
967
968    // Build name => enum map
969
970    // This is an n->1 map.  There are typically multiple names
971    // mapping to one enum.  The name index is sorted in order of the name,
972    // as defined by the uprv_compareAliasNames() function.
973
974    int32_t i, j;
975    int32_t count = list.count();
976
977    // compute upper limit on number of names in the index
978    int32_t nameIndexCapacity = count * MAX_NAMES_PER_GROUP;
979    NameToEnumEntry* nameIndex = MALLOC(NameToEnumEntry, nameIndexCapacity);
980
981    nameIndexCount = 0;
982    int32_t names[MAX_NAMES_PER_GROUP];
983    for (i=0; i<count; ++i) {
984        const Alias& p = list[i];
985        int32_t n = p.getUniqueNames(names);
986        for (j=0; j<n; ++j) {
987            U_ASSERT(nameIndexCount < nameIndexCapacity);
988            nameIndex[nameIndexCount++] =
989                NameToEnumEntry(names[j], p.enumValue);
990        }
991    }
992
993    /*
994     * use a stable sort to ensure consistent results between
995     * genpname.cpp and the propname.cpp swapping code
996     */
997    UErrorCode errorCode = U_ZERO_ERROR;
998    uprv_sortArray(nameIndex, nameIndexCount, sizeof(nameIndex[0]),
999                   compareNameToEnumEntry, NULL, TRUE, &errorCode);
1000    if (debug>1) {
1001        printf("Alias names: %d\n", (int)nameIndexCount);
1002        for (i=0; i<nameIndexCount; ++i) {
1003            printf("%s => %d\n",
1004                   STRING_TABLE[nameIndex[i].nameIndex].str,
1005                   (int)nameIndex[i].enumValue);
1006        }
1007        printf("\n");
1008    }
1009    // make sure there are no duplicates.  for a sorted list we need
1010    // only compare adjacent items.  Alias.getUniqueNames() has
1011    // already eliminated duplicate names for a single property, which
1012    // does occur, so we're checking for duplicate names between two
1013    // properties, which should never occur.
1014    UBool ok = TRUE;
1015    for (i=1; i<nameIndexCount; ++i) {
1016        if (STRING_TABLE[nameIndex[i-1].nameIndex] ==
1017            STRING_TABLE[nameIndex[i].nameIndex]) {
1018            printf("Error: Duplicate names in property list: \"%s\", \"%s\"\n",
1019                   STRING_TABLE[nameIndex[i-1].nameIndex].str,
1020                   STRING_TABLE[nameIndex[i].nameIndex].str);
1021            ok = FALSE;
1022        }
1023    }
1024    if (!ok) {
1025        die("Two or more duplicate names in property list");
1026    }
1027
1028    return nameIndex;
1029}
1030
1031EnumToNameGroupEntry* genpname::createEnumIndex(const AliasList& list) {
1032
1033    // Build the enum => name map
1034
1035    // This is a 1->n map.  Each enum maps to 1 or more names.  To
1036    // accomplish this the index entry points to an element of the
1037    // NAME_GROUP array.  This is the short name (which may be empty).
1038    // From there, subsequent elements of NAME_GROUP are alternate
1039    // names for this enum, up to and including the first one that is
1040    // negative (negate for actual index).
1041
1042    int32_t i, j, k;
1043    int32_t count = list.count();
1044
1045    EnumToNameGroupEntry* enumIndex = MALLOC(EnumToNameGroupEntry, count);
1046    for (i=0; i<count; ++i) {
1047        const Alias& p = list[i];
1048        enumIndex[i] = EnumToNameGroupEntry(p.enumValue, p.nameGroupIndex);
1049    }
1050
1051    UErrorCode errorCode = U_ZERO_ERROR;
1052    uprv_sortArray(enumIndex, count, sizeof(enumIndex[0]),
1053                   compareEnumToNameGroupEntry, NULL, FALSE, &errorCode);
1054    if (debug>1) {
1055        printf("Property enums: %d\n", (int)count);
1056        for (i=0; i<count; ++i) {
1057            printf("%d => %d: ",
1058                   (int)enumIndex[i].enumValue,
1059                   (int)enumIndex[i].nameGroupIndex);
1060            UBool done = FALSE;
1061            for (j=enumIndex[i].nameGroupIndex; !done; ++j) {
1062                k = NAME_GROUP[j];
1063                if (k < 0) {
1064                    k = -k;
1065                    done = TRUE;
1066                }
1067                printf("\"%s\"", STRING_TABLE[k].str);
1068                if (!done) printf(", ");
1069            }
1070            printf("\n");
1071        }
1072        printf("\n");
1073    }
1074    return enumIndex;
1075}
1076
1077int genpname::MMain(int argc, char* argv[])
1078{
1079    int32_t i, j;
1080    UErrorCode status = U_ZERO_ERROR;
1081
1082    u_init(&status);
1083    if (U_FAILURE(status) && status != U_FILE_ACCESS_ERROR) {
1084        fprintf(stderr, "Error: u_init returned %s\n", u_errorName(status));
1085        status = U_ZERO_ERROR;
1086    }
1087
1088
1089    /* preset then read command line options */
1090    options[3].value=u_getDataDirectory();
1091    argc=u_parseArgs(argc, argv, sizeof(options)/sizeof(options[0]), options);
1092
1093    /* error handling, printing usage message */
1094    if (argc<0) {
1095        fprintf(stderr,
1096            "error in command line argument \"%s\"\n",
1097            argv[-argc]);
1098    }
1099
1100    debug = options[5].doesOccur ? (*options[5].value - '0') : 0;
1101
1102    if (argc!=1 || options[0].doesOccur || options[1].doesOccur ||
1103       debug < 0 || debug > 9) {
1104        fprintf(stderr,
1105            "usage: %s [-options]\n"
1106            "\tcreate " PNAME_DATA_NAME "." PNAME_DATA_TYPE "\n"
1107            "options:\n"
1108            "\t-h or -? or --help  this usage text\n"
1109            "\t-v or --verbose     turn on verbose output\n"
1110            "\t-c or --copyright   include a copyright notice\n"
1111            "\t-d or --destdir     destination directory, followed by the path\n"
1112            "\t-D or --debug 0..9  emit debugging messages (if > 0)\n",
1113            argv[0]);
1114        return argc<0 ? U_ILLEGAL_ARGUMENT_ERROR : U_ZERO_ERROR;
1115    }
1116
1117    /* get the options values */
1118    useCopyright=options[2].doesOccur;
1119    verbose = options[4].doesOccur;
1120
1121    // ------------------------------------------------------------
1122    // Do not sort the string table, instead keep it in data.h order.
1123    // This simplifies data swapping and testing thereof because the string
1124    // table itself need not be sorted during swapping.
1125    // The NameToEnum sorter sorts each such map's string offsets instead.
1126
1127    if (debug>1) {
1128        printf("String pool: %d\n", (int)STRING_COUNT);
1129        for (i=0; i<STRING_COUNT; ++i) {
1130            if (i != 0) {
1131                printf(", ");
1132            }
1133            printf("%s (%d)", STRING_TABLE[i].str, (int)STRING_TABLE[i].index);
1134        }
1135        printf("\n\n");
1136    }
1137
1138    // ------------------------------------------------------------
1139    // Create top-level property indices
1140
1141    PropertyArrayList props(PROPERTY, PROPERTY_COUNT);
1142    int32_t propNameCount;
1143    NameToEnumEntry* propName = createNameIndex(props, propNameCount);
1144    EnumToNameGroupEntry* propEnum = createEnumIndex(props);
1145
1146    // ------------------------------------------------------------
1147    // Create indices for the value list for each enumerated property
1148
1149    // This will have more entries than we need...
1150    EnumToValueEntry* enumToValue = MALLOC(EnumToValueEntry, PROPERTY_COUNT);
1151    int32_t enumToValue_count = 0;
1152    for (i=0, j=0; i<PROPERTY_COUNT; ++i) {
1153        if (PROPERTY[i].valueCount == 0) continue;
1154        AliasArrayList values(PROPERTY[i].valueList,
1155                              PROPERTY[i].valueCount);
1156        enumToValue[j].enumValue = PROPERTY[i].enumValue;
1157        enumToValue[j].enumToName = createEnumIndex(values);
1158        enumToValue[j].enumToName_count = PROPERTY[i].valueCount;
1159        enumToValue[j].nameToEnum = createNameIndex(values,
1160                                                    enumToValue[j].nameToEnum_count);
1161        ++j;
1162    }
1163    enumToValue_count = j;
1164
1165    uprv_sortArray(enumToValue, enumToValue_count, sizeof(enumToValue[0]),
1166                   compareEnumToValueEntry, NULL, FALSE, &status);
1167
1168    // ------------------------------------------------------------
1169    // Build PropertyAliases layout in memory
1170
1171    Builder builder(debug);
1172
1173    builder.buildTopLevelProperties(propName,
1174                                    propNameCount,
1175                                    propEnum,
1176                                    PROPERTY_COUNT);
1177
1178    builder.buildValues(enumToValue,
1179                        enumToValue_count);
1180
1181    builder.buildStringPool(STRING_TABLE,
1182                            STRING_COUNT,
1183                            NAME_GROUP,
1184                            NAME_GROUP_COUNT);
1185
1186    builder.fixup();
1187
1188    ////////////////////////////////////////////////////////////
1189    // Write the output file
1190    ////////////////////////////////////////////////////////////
1191    int32_t wlen = writeDataFile(options[3].value, builder);
1192    if (verbose) {
1193        fprintf(stdout, "Output file: %s.%s, %ld bytes\n",
1194            U_ICUDATA_NAME "_" PNAME_DATA_NAME, PNAME_DATA_TYPE, (long)wlen);
1195    }
1196
1197    return 0; // success
1198}
1199
1200int32_t genpname::writeDataFile(const char *destdir, const Builder& builder) {
1201    int32_t length;
1202    int8_t* data = builder.createData(length);
1203
1204    UNewDataMemory *pdata;
1205    UErrorCode status = U_ZERO_ERROR;
1206
1207    pdata = udata_create(destdir, PNAME_DATA_TYPE, PNAME_DATA_NAME, &dataInfo,
1208                         useCopyright ? U_COPYRIGHT_STRING : 0, &status);
1209    if (U_FAILURE(status)) {
1210        die("Unable to create data memory");
1211    }
1212
1213    udata_writeBlock(pdata, data, length);
1214
1215    int32_t dataLength = (int32_t) udata_finish(pdata, &status);
1216    if (U_FAILURE(status)) {
1217        die("Error writing output file");
1218    }
1219    if (dataLength != length) {
1220        die("Written file doesn't match expected size");
1221    }
1222
1223    return dataLength;
1224}
1225
1226//eof
1227