1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2013, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  package.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005aug25
14*   created by: Markus W. Scherer
15*
16*   Read, modify, and write ICU .dat data package files.
17*   This is an integral part of the icupkg tool, moved to the toolutil library
18*   because parts of tool implementations tend to be later shared by
19*   other tools.
20*   Subsumes functionality and implementation code from
21*   gencmn, decmn, and icuswap tools.
22*/
23
24#include "unicode/utypes.h"
25#include "unicode/putil.h"
26#include "unicode/udata.h"
27#include "cstring.h"
28#include "uarrsort.h"
29#include "ucmndata.h"
30#include "udataswp.h"
31#include "swapimpl.h"
32#include "toolutil.h"
33#include "package.h"
34#include "cmemory.h"
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40
41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
42
43// general definitions ----------------------------------------------------- ***
44
45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46
47/* UDataInfo cf. udata.h */
48static const UDataInfo dataInfo={
49    (uint16_t)sizeof(UDataInfo),
50    0,
51
52    U_IS_BIG_ENDIAN,
53    U_CHARSET_FAMILY,
54    (uint8_t)sizeof(UChar),
55    0,
56
57    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
58    {1, 0, 0, 0},                 /* formatVersion */
59    {3, 0, 0, 0}                  /* dataVersion */
60};
61
62U_CDECL_BEGIN
63static void U_CALLCONV
64printPackageError(void *context, const char *fmt, va_list args) {
65    vfprintf((FILE *)context, fmt, args);
66}
67U_CDECL_END
68
69static uint16_t
70readSwapUInt16(uint16_t x) {
71    return (uint16_t)((x<<8)|(x>>8));
72}
73
74// platform types ---------------------------------------------------------- ***
75
76static const char *types="lb?e";
77
78enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
79
80static inline int32_t
81makeTypeEnum(uint8_t charset, UBool isBigEndian) {
82    return 2*(int32_t)charset+isBigEndian;
83}
84
85static inline int32_t
86makeTypeEnum(char type) {
87    return
88        type == 'l' ? TYPE_L :
89        type == 'b' ? TYPE_B :
90        type == 'e' ? TYPE_E :
91               -1;
92}
93
94static inline char
95makeTypeLetter(uint8_t charset, UBool isBigEndian) {
96    return types[makeTypeEnum(charset, isBigEndian)];
97}
98
99static inline char
100makeTypeLetter(int32_t typeEnum) {
101    return types[typeEnum];
102}
103
104static void
105makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
106    int32_t typeEnum=makeTypeEnum(type);
107    charset=(uint8_t)(typeEnum>>1);
108    isBigEndian=(UBool)(typeEnum&1);
109}
110
111U_CFUNC const UDataInfo *
112getDataInfo(const uint8_t *data, int32_t length,
113            int32_t &infoLength, int32_t &headerLength,
114            UErrorCode *pErrorCode) {
115    const DataHeader *pHeader;
116    const UDataInfo *pInfo;
117
118    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
119        return NULL;
120    }
121    if( data==NULL ||
122        (length>=0 && length<(int32_t)sizeof(DataHeader))
123    ) {
124        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
125        return NULL;
126    }
127
128    pHeader=(const DataHeader *)data;
129    pInfo=&pHeader->info;
130    if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
131        pHeader->dataHeader.magic1!=0xda ||
132        pHeader->dataHeader.magic2!=0x27 ||
133        pInfo->sizeofUChar!=2
134    ) {
135        *pErrorCode=U_UNSUPPORTED_ERROR;
136        return NULL;
137    }
138
139    if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
140        headerLength=pHeader->dataHeader.headerSize;
141        infoLength=pInfo->size;
142    } else {
143        headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
144        infoLength=readSwapUInt16(pInfo->size);
145    }
146
147    if( headerLength<(int32_t)sizeof(DataHeader) ||
148        infoLength<(int32_t)sizeof(UDataInfo) ||
149        headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
150        (length>=0 && length<headerLength)
151    ) {
152        *pErrorCode=U_UNSUPPORTED_ERROR;
153        return NULL;
154    }
155
156    return pInfo;
157}
158
159static int32_t
160getTypeEnumForInputData(const uint8_t *data, int32_t length,
161                        UErrorCode *pErrorCode) {
162    const UDataInfo *pInfo;
163    int32_t infoLength, headerLength;
164
165    /* getDataInfo() checks for illegal arguments */
166    pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
167    if(pInfo==NULL) {
168        return -1;
169    }
170
171    return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
172}
173
174// file handling ----------------------------------------------------------- ***
175
176static void
177extractPackageName(const char *filename,
178                   char pkg[], int32_t capacity) {
179    const char *basename;
180    int32_t len;
181
182    basename=findBasename(filename);
183    len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
184
185    if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
186        fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
187                         basename);
188        exit(U_ILLEGAL_ARGUMENT_ERROR);
189    }
190
191    if(len>=capacity) {
192        fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
193                         basename, (long)capacity);
194        exit(U_ILLEGAL_ARGUMENT_ERROR);
195    }
196
197    memcpy(pkg, basename, len);
198    pkg[len]=0;
199}
200
201static int32_t
202getFileLength(FILE *f) {
203    int32_t length;
204
205    fseek(f, 0, SEEK_END);
206    length=(int32_t)ftell(f);
207    fseek(f, 0, SEEK_SET);
208    return length;
209}
210
211/*
212 * Turn tree separators and alternate file separators into normal file separators.
213 */
214#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
215#define treeToPath(s)
216#else
217static void
218treeToPath(char *s) {
219    char *t;
220
221    for(t=s; *t!=0; ++t) {
222        if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
223            *t=U_FILE_SEP_CHAR;
224        }
225    }
226}
227#endif
228
229/*
230 * Turn file separators into tree separators.
231 */
232#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
233#define pathToTree(s)
234#else
235static void
236pathToTree(char *s) {
237    char *t;
238
239    for(t=s; *t!=0; ++t) {
240        if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
241            *t=U_TREE_ENTRY_SEP_CHAR;
242        }
243    }
244}
245#endif
246
247/*
248 * Prepend the path (if any) to the name and run the name through treeToName().
249 */
250static void
251makeFullFilename(const char *path, const char *name,
252                 char *filename, int32_t capacity) {
253    char *s;
254
255    // prepend the path unless NULL or empty
256    if(path!=NULL && path[0]!=0) {
257        if((int32_t)(strlen(path)+1)>=capacity) {
258            fprintf(stderr, "pathname too long: \"%s\"\n", path);
259            exit(U_BUFFER_OVERFLOW_ERROR);
260        }
261        strcpy(filename, path);
262
263        // make sure the path ends with a file separator
264        s=strchr(filename, 0);
265        if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
266            *s++=U_FILE_SEP_CHAR;
267        }
268    } else {
269        s=filename;
270    }
271
272    // turn the name into a filename, turn tree separators into file separators
273    if((int32_t)((s-filename)+strlen(name))>=capacity) {
274        fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
275        exit(U_BUFFER_OVERFLOW_ERROR);
276    }
277    strcpy(s, name);
278    treeToPath(s);
279}
280
281static void
282makeFullFilenameAndDirs(const char *path, const char *name,
283                        char *filename, int32_t capacity) {
284    char *sep;
285    UErrorCode errorCode;
286
287    makeFullFilename(path, name, filename, capacity);
288
289    // make tree directories
290    errorCode=U_ZERO_ERROR;
291    sep=strchr(filename, 0)-strlen(name);
292    while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
293        if(sep!=filename) {
294            *sep=0;                 // truncate temporarily
295            uprv_mkdir(filename, &errorCode);
296            if(U_FAILURE(errorCode)) {
297                fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
298                exit(U_FILE_ACCESS_ERROR);
299            }
300        }
301        *sep++=U_FILE_SEP_CHAR; // restore file separator character
302    }
303}
304
305static uint8_t *
306readFile(const char *path, const char *name, int32_t &length, char &type) {
307    char filename[1024];
308    FILE *file;
309    uint8_t *data;
310    UErrorCode errorCode;
311    int32_t fileLength, typeEnum;
312
313    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
314
315    /* open the input file, get its length, allocate memory for it, read the file */
316    file=fopen(filename, "rb");
317    if(file==NULL) {
318        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
319        exit(U_FILE_ACCESS_ERROR);
320    }
321
322    /* get the file length */
323    fileLength=getFileLength(file);
324    if(ferror(file) || fileLength<=0) {
325        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
326        fclose(file);
327        exit(U_FILE_ACCESS_ERROR);
328    }
329
330    /* allocate the buffer, pad to multiple of 16 */
331    length=(fileLength+0xf)&~0xf;
332    data=(uint8_t *)uprv_malloc(length);
333    if(data==NULL) {
334        fclose(file);
335        fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
336        exit(U_MEMORY_ALLOCATION_ERROR);
337    }
338
339    /* read the file */
340    if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
341        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
342        fclose(file);
343        free(data);
344        exit(U_FILE_ACCESS_ERROR);
345    }
346
347    /* pad the file to a multiple of 16 using the usual padding byte */
348    if(fileLength<length) {
349        memset(data+fileLength, 0xaa, length-fileLength);
350    }
351
352    fclose(file);
353
354    // minimum check for ICU-format data
355    errorCode=U_ZERO_ERROR;
356    typeEnum=getTypeEnumForInputData(data, length, &errorCode);
357    if(typeEnum<0 || U_FAILURE(errorCode)) {
358        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
359        free(data);
360#if !UCONFIG_NO_LEGACY_CONVERSION
361        exit(U_INVALID_FORMAT_ERROR);
362#else
363        fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
364        exit(0);
365#endif
366    }
367    type=makeTypeLetter(typeEnum);
368
369    return data;
370}
371
372// .dat package file representation ---------------------------------------- ***
373
374U_CDECL_BEGIN
375
376static int32_t U_CALLCONV
377compareItems(const void * /*context*/, const void *left, const void *right) {
378    U_NAMESPACE_USE
379
380    return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
381}
382
383U_CDECL_END
384
385U_NAMESPACE_BEGIN
386
387Package::Package()
388        : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) {
389    inPkgName[0]=0;
390    pkgPrefix[0]=0;
391    inData=NULL;
392    inLength=0;
393    inCharset=U_CHARSET_FAMILY;
394    inIsBigEndian=U_IS_BIG_ENDIAN;
395
396    itemCount=0;
397    itemMax=0;
398    items=NULL;
399
400    inStringTop=outStringTop=0;
401
402    matchMode=0;
403    findPrefix=findSuffix=NULL;
404    findPrefixLength=findSuffixLength=0;
405    findNextIndex=-1;
406
407    // create a header for an empty package
408    DataHeader *pHeader;
409    pHeader=(DataHeader *)header;
410    pHeader->dataHeader.magic1=0xda;
411    pHeader->dataHeader.magic2=0x27;
412    memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
413    headerLength=(int32_t)(4+sizeof(dataInfo));
414    if(headerLength&0xf) {
415        /* NUL-pad the header to a multiple of 16 */
416        int32_t length=(headerLength+0xf)&~0xf;
417        memset(header+headerLength, 0, length-headerLength);
418        headerLength=length;
419    }
420    pHeader->dataHeader.headerSize=(uint16_t)headerLength;
421}
422
423Package::~Package() {
424    int32_t idx;
425
426    free(inData);
427
428    for(idx=0; idx<itemCount; ++idx) {
429        if(items[idx].isDataOwned) {
430            free(items[idx].data);
431        }
432    }
433
434    uprv_free((void*)items);
435}
436
437void
438Package::setPrefix(const char *p) {
439    if(strlen(p)>=sizeof(pkgPrefix)) {
440        fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
441        exit(U_ILLEGAL_ARGUMENT_ERROR);
442    }
443    strcpy(pkgPrefix, p);
444}
445
446void
447Package::readPackage(const char *filename) {
448    UDataSwapper *ds;
449    const UDataInfo *pInfo;
450    UErrorCode errorCode;
451
452    const uint8_t *inBytes;
453
454    int32_t length, offset, i;
455    int32_t itemLength, typeEnum;
456    char type;
457
458    const UDataOffsetTOCEntry *inEntries;
459
460    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
461
462    /* read the file */
463    inData=readFile(NULL, filename, inLength, type);
464    length=inLength;
465
466    /*
467     * swap the header - even if the swapping itself is a no-op
468     * because it tells us the header length
469     */
470    errorCode=U_ZERO_ERROR;
471    makeTypeProps(type, inCharset, inIsBigEndian);
472    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
473    if(U_FAILURE(errorCode)) {
474        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
475                filename, u_errorName(errorCode));
476        exit(errorCode);
477    }
478
479    ds->printError=printPackageError;
480    ds->printErrorContext=stderr;
481
482    headerLength=sizeof(header);
483    if(length<headerLength) {
484        headerLength=length;
485    }
486    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
487    if(U_FAILURE(errorCode)) {
488        exit(errorCode);
489    }
490
491    /* check data format and format version */
492    pInfo=(const UDataInfo *)((const char *)inData+4);
493    if(!(
494        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
495        pInfo->dataFormat[1]==0x6d &&
496        pInfo->dataFormat[2]==0x6e &&
497        pInfo->dataFormat[3]==0x44 &&
498        pInfo->formatVersion[0]==1
499    )) {
500        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
501                pInfo->dataFormat[0], pInfo->dataFormat[1],
502                pInfo->dataFormat[2], pInfo->dataFormat[3],
503                pInfo->formatVersion[0]);
504        exit(U_UNSUPPORTED_ERROR);
505    }
506    inIsBigEndian=(UBool)pInfo->isBigEndian;
507    inCharset=pInfo->charsetFamily;
508
509    inBytes=(const uint8_t *)inData+headerLength;
510    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
511
512    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
513    length-=headerLength;
514    if(length<4) {
515        /* itemCount does not fit */
516        offset=0x7fffffff;
517    } else {
518        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
519        setItemCapacity(itemCount); /* resize so there's space */
520        if(itemCount==0) {
521            offset=4;
522        } else if(length<(4+8*itemCount)) {
523            /* ToC table does not fit */
524            offset=0x7fffffff;
525        } else {
526            /* offset of the last item plus at least 20 bytes for its header */
527            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
528        }
529    }
530    if(length<offset) {
531        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
532                        (long)length);
533        exit(U_INDEX_OUTOFBOUNDS_ERROR);
534    }
535    /* do not modify the package length variable until the last item's length is set */
536
537    if(itemCount<=0) {
538        if(doAutoPrefix) {
539            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
540            exit(U_INVALID_FORMAT_ERROR);
541        }
542    } else {
543        char prefix[MAX_PKG_NAME_LENGTH+4];
544        char *s, *inItemStrings;
545
546        if(itemCount>itemMax) {
547            fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
548            exit(U_BUFFER_OVERFLOW_ERROR);
549        }
550
551        /* swap the item name strings */
552        int32_t stringsOffset=4+8*itemCount;
553        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
554
555        // don't include padding bytes at the end of the item names
556        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
557            --itemLength;
558        }
559
560        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
561            fprintf(stderr, "icupkg: total length of item name strings too long\n");
562            exit(U_BUFFER_OVERFLOW_ERROR);
563        }
564
565        inItemStrings=inStrings+inStringTop;
566        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
567        if(U_FAILURE(errorCode)) {
568            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
569            exit(U_INVALID_FORMAT_ERROR);
570        }
571        inStringTop+=itemLength;
572
573        // reset the Item entries
574        memset(items, 0, itemCount*sizeof(Item));
575
576        /*
577         * Get the common prefix of the items.
578         * New-style ICU .dat packages use tree separators ('/') between package names,
579         * tree names, and item names,
580         * while old-style ICU .dat packages (before multi-tree support)
581         * use an underscore ('_') between package and item names.
582         */
583        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
584        s=inItemStrings+offset;  // name of the first entry
585        int32_t prefixLength;
586        if(doAutoPrefix) {
587            // Use the first entry's prefix. Must be a new-style package.
588            const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
589            if(prefixLimit==NULL) {
590                fprintf(stderr,
591                        "icupkg: --auto_toc_prefix[_with_type] but "
592                        "the first entry \"%s\" does not contain a '%c'\n",
593                        s, U_TREE_ENTRY_SEP_CHAR);
594                exit(U_INVALID_FORMAT_ERROR);
595            }
596            prefixLength=(int32_t)(prefixLimit-s);
597            if(prefixLength==0 || prefixLength>=LENGTHOF(pkgPrefix)) {
598                fprintf(stderr,
599                        "icupkg: --auto_toc_prefix[_with_type] but "
600                        "the prefix of the first entry \"%s\" is empty or too long\n",
601                        s);
602                exit(U_INVALID_FORMAT_ERROR);
603            }
604            if(prefixEndsWithType && s[prefixLength-1]!=type) {
605                fprintf(stderr,
606                        "icupkg: --auto_toc_prefix_with_type but "
607                        "the prefix of the first entry \"%s\" does not end with '%c'\n",
608                        s, type);
609                exit(U_INVALID_FORMAT_ERROR);
610            }
611            memcpy(pkgPrefix, s, prefixLength);
612            pkgPrefix[prefixLength]=0;
613            memcpy(prefix, s, ++prefixLength);  // include the /
614        } else {
615            // Use the package basename as prefix.
616            int32_t inPkgNameLength=strlen(inPkgName);
617            memcpy(prefix, inPkgName, inPkgNameLength);
618            prefixLength=inPkgNameLength;
619
620            if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
621                0==memcmp(s, inPkgName, inPkgNameLength) &&
622                s[inPkgNameLength]=='_'
623            ) {
624                // old-style .dat package
625                prefix[prefixLength++]='_';
626            } else {
627                // new-style .dat package
628                prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
629                // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
630                // then the test in the loop below will fail
631            }
632        }
633        prefix[prefixLength]=0;
634
635        /* read the ToC table */
636        for(i=0; i<itemCount; ++i) {
637            // skip the package part of the item name, error if it does not match the actual package name
638            // or if nothing follows the package name
639            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
640            s=inItemStrings+offset;
641            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
642                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
643                        s, prefix);
644                exit(U_INVALID_FORMAT_ERROR);
645            }
646            items[i].name=s+prefixLength;
647
648            // set the item's data
649            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
650            if(i>0) {
651                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
652
653                // set the previous item's platform type
654                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
655                if(typeEnum<0 || U_FAILURE(errorCode)) {
656                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
657                    exit(U_INVALID_FORMAT_ERROR);
658                }
659                items[i-1].type=makeTypeLetter(typeEnum);
660            }
661            items[i].isDataOwned=FALSE;
662        }
663        // set the last item's length
664        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
665
666        // set the last item's platform type
667        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
668        if(typeEnum<0 || U_FAILURE(errorCode)) {
669            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
670            exit(U_INVALID_FORMAT_ERROR);
671        }
672        items[itemCount-1].type=makeTypeLetter(typeEnum);
673
674        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
675            // sort the item names for the local charset
676            sortItems();
677        }
678    }
679
680    udata_closeSwapper(ds);
681}
682
683char
684Package::getInType() {
685    return makeTypeLetter(inCharset, inIsBigEndian);
686}
687
688void
689Package::writePackage(const char *filename, char outType, const char *comment) {
690    char prefix[MAX_PKG_NAME_LENGTH+4];
691    UDataOffsetTOCEntry entry;
692    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
693    FILE *file;
694    Item *pItem;
695    char *name;
696    UErrorCode errorCode;
697    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
698    uint8_t outCharset;
699    UBool outIsBigEndian;
700
701    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
702
703    // if there is an explicit comment, then use it, else use what's in the current header
704    if(comment!=NULL) {
705        /* get the header size minus the current comment */
706        DataHeader *pHeader;
707        int32_t length;
708
709        pHeader=(DataHeader *)header;
710        headerLength=4+pHeader->info.size;
711        length=(int32_t)strlen(comment);
712        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
713            fprintf(stderr, "icupkg: comment too long\n");
714            exit(U_BUFFER_OVERFLOW_ERROR);
715        }
716        memcpy(header+headerLength, comment, length+1);
717        headerLength+=length;
718        if(headerLength&0xf) {
719            /* NUL-pad the header to a multiple of 16 */
720            length=(headerLength+0xf)&~0xf;
721            memset(header+headerLength, 0, length-headerLength);
722            headerLength=length;
723        }
724        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
725    }
726
727    makeTypeProps(outType, outCharset, outIsBigEndian);
728
729    // open (TYPE_COUNT-2) swappers
730    // one is a no-op for local type==outType
731    // one type (TYPE_LE) is bogus
732    errorCode=U_ZERO_ERROR;
733    i=makeTypeEnum(outType);
734    ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
735    ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
736    ds[TYPE_LE]=NULL;
737    ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
738    if(U_FAILURE(errorCode)) {
739        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
740        exit(errorCode);
741    }
742    for(i=0; i<TYPE_COUNT; ++i) {
743        if(ds[i]!=NULL) {
744            ds[i]->printError=printPackageError;
745            ds[i]->printErrorContext=stderr;
746        }
747    }
748
749    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
750
751    // create the file and write its contents
752    file=fopen(filename, "wb");
753    if(file==NULL) {
754        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
755        exit(U_FILE_ACCESS_ERROR);
756    }
757
758    // swap and write the header
759    if(dsLocalToOut!=NULL) {
760        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
761        if(U_FAILURE(errorCode)) {
762            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
763            exit(errorCode);
764        }
765    }
766    length=(int32_t)fwrite(header, 1, headerLength, file);
767    if(length!=headerLength) {
768        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
769        exit(U_FILE_ACCESS_ERROR);
770    }
771
772    // prepare and swap the package name with a tree separator
773    // for prepending to item names
774    if(pkgPrefix[0]==0) {
775        prefixLength=(int32_t)strlen(prefix);
776    } else {
777        prefixLength=(int32_t)strlen(pkgPrefix);
778        memcpy(prefix, pkgPrefix, prefixLength);
779        if(prefixEndsWithType) {
780            prefix[prefixLength-1]=outType;
781        }
782    }
783    prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
784    prefix[prefixLength]=0;
785    if(dsLocalToOut!=NULL) {
786        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
787        if(U_FAILURE(errorCode)) {
788            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
789            exit(errorCode);
790        }
791
792        // swap and sort the item names (sorting needs to be done in the output charset)
793        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
794        if(U_FAILURE(errorCode)) {
795            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
796            exit(errorCode);
797        }
798        sortItems();
799    }
800
801    // create the output item names in sorted order, with the package name prepended to each
802    for(i=0; i<itemCount; ++i) {
803        length=(int32_t)strlen(items[i].name);
804        name=allocString(FALSE, length+prefixLength);
805        memcpy(name, prefix, prefixLength);
806        memcpy(name+prefixLength, items[i].name, length+1);
807        items[i].name=name;
808    }
809
810    // calculate offsets for item names and items, pad to 16-align items
811    // align only the first item; each item's length is a multiple of 16
812    basenameOffset=4+8*itemCount;
813    offset=basenameOffset+outStringTop;
814    if((length=(offset&15))!=0) {
815        length=16-length;
816        memset(allocString(FALSE, length-1), 0xaa, length);
817        offset+=length;
818    }
819
820    // write the table of contents
821    // first the itemCount
822    outInt32=itemCount;
823    if(dsLocalToOut!=NULL) {
824        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
825        if(U_FAILURE(errorCode)) {
826            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
827            exit(errorCode);
828        }
829    }
830    length=(int32_t)fwrite(&outInt32, 1, 4, file);
831    if(length!=4) {
832        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
833        exit(U_FILE_ACCESS_ERROR);
834    }
835
836    // then write the item entries (and collect the maxItemLength)
837    maxItemLength=0;
838    for(i=0; i<itemCount; ++i) {
839        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
840        entry.dataOffset=(uint32_t)offset;
841        if(dsLocalToOut!=NULL) {
842            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
843            if(U_FAILURE(errorCode)) {
844                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
845                exit(errorCode);
846            }
847        }
848        length=(int32_t)fwrite(&entry, 1, 8, file);
849        if(length!=8) {
850            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
851            exit(U_FILE_ACCESS_ERROR);
852        }
853
854        length=items[i].length;
855        if(length>maxItemLength) {
856            maxItemLength=length;
857        }
858        offset+=length;
859    }
860
861    // write the item names
862    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
863    if(length!=outStringTop) {
864        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
865        exit(U_FILE_ACCESS_ERROR);
866    }
867
868    // write the items
869    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
870        int32_t type=makeTypeEnum(pItem->type);
871        if(ds[type]!=NULL) {
872            // swap each item from its platform properties to the desired ones
873            udata_swap(
874                ds[type],
875                pItem->data, pItem->length, pItem->data,
876                &errorCode);
877            if(U_FAILURE(errorCode)) {
878                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
879                exit(errorCode);
880            }
881        }
882        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
883        if(length!=pItem->length) {
884            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
885            exit(U_FILE_ACCESS_ERROR);
886        }
887    }
888
889    if(ferror(file)) {
890        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
891        exit(U_FILE_ACCESS_ERROR);
892    }
893
894    fclose(file);
895    for(i=0; i<TYPE_COUNT; ++i) {
896        udata_closeSwapper(ds[i]);
897    }
898}
899
900int32_t
901Package::findItem(const char *name, int32_t length) const {
902    int32_t i, start, limit;
903    int result;
904
905    /* do a binary search for the string */
906    start=0;
907    limit=itemCount;
908    while(start<limit) {
909        i=(start+limit)/2;
910        if(length>=0) {
911            result=strncmp(name, items[i].name, length);
912        } else {
913            result=strcmp(name, items[i].name);
914        }
915
916        if(result==0) {
917            /* found */
918            if(length>=0) {
919                /*
920                 * if we compared just prefixes, then we may need to back up
921                 * to the first item with this prefix
922                 */
923                while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
924                    --i;
925                }
926            }
927            return i;
928        } else if(result<0) {
929            limit=i;
930        } else /* result>0 */ {
931            start=i+1;
932        }
933    }
934
935    return ~start; /* not found, return binary-not of the insertion point */
936}
937
938void
939Package::findItems(const char *pattern) {
940    const char *wild;
941
942    if(pattern==NULL || *pattern==0) {
943        findNextIndex=-1;
944        return;
945    }
946
947    findPrefix=pattern;
948    findSuffix=NULL;
949    findSuffixLength=0;
950
951    wild=strchr(pattern, '*');
952    if(wild==NULL) {
953        // no wildcard
954        findPrefixLength=(int32_t)strlen(pattern);
955    } else {
956        // one wildcard
957        findPrefixLength=(int32_t)(wild-pattern);
958        findSuffix=wild+1;
959        findSuffixLength=(int32_t)strlen(findSuffix);
960        if(NULL!=strchr(findSuffix, '*')) {
961            // two or more wildcards
962            fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
963            exit(U_PARSE_ERROR);
964        }
965    }
966
967    if(findPrefixLength==0) {
968        findNextIndex=0;
969    } else {
970        findNextIndex=findItem(findPrefix, findPrefixLength);
971    }
972}
973
974int32_t
975Package::findNextItem() {
976    const char *name, *middle, *treeSep;
977    int32_t idx, nameLength, middleLength;
978
979    if(findNextIndex<0) {
980        return -1;
981    }
982
983    while(findNextIndex<itemCount) {
984        idx=findNextIndex++;
985        name=items[idx].name;
986        nameLength=(int32_t)strlen(name);
987        if(nameLength<(findPrefixLength+findSuffixLength)) {
988            // item name too short for prefix & suffix
989            continue;
990        }
991        if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
992            // left the range of names with this prefix
993            break;
994        }
995        middle=name+findPrefixLength;
996        middleLength=nameLength-findPrefixLength-findSuffixLength;
997        if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
998            // suffix does not match
999            continue;
1000        }
1001        // prefix & suffix match
1002
1003        if(matchMode&MATCH_NOSLASH) {
1004            treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
1005            if(treeSep!=NULL && (treeSep-middle)<middleLength) {
1006                // the middle (matching the * wildcard) contains a tree separator /
1007                continue;
1008            }
1009        }
1010
1011        // found a matching item
1012        return idx;
1013    }
1014
1015    // no more items
1016    findNextIndex=-1;
1017    return -1;
1018}
1019
1020void
1021Package::setMatchMode(uint32_t mode) {
1022    matchMode=mode;
1023}
1024
1025void
1026Package::addItem(const char *name) {
1027    addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
1028}
1029
1030void
1031Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
1032    int32_t idx;
1033
1034    idx=findItem(name);
1035    if(idx<0) {
1036        // new item, make space at the insertion point
1037        ensureItemCapacity();
1038        // move the following items down
1039        idx=~idx;
1040        if(idx<itemCount) {
1041            memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
1042        }
1043        ++itemCount;
1044
1045        // reset this Item entry
1046        memset(items+idx, 0, sizeof(Item));
1047
1048        // copy the item's name
1049        items[idx].name=allocString(TRUE, strlen(name));
1050        strcpy(items[idx].name, name);
1051        pathToTree(items[idx].name);
1052    } else {
1053        // same-name item found, replace it
1054        if(items[idx].isDataOwned) {
1055            free(items[idx].data);
1056        }
1057
1058        // keep the item's name since it is the same
1059    }
1060
1061    // set the item's data
1062    items[idx].data=data;
1063    items[idx].length=length;
1064    items[idx].isDataOwned=isDataOwned;
1065    items[idx].type=type;
1066}
1067
1068void
1069Package::addFile(const char *filesPath, const char *name) {
1070    uint8_t *data;
1071    int32_t length;
1072    char type;
1073
1074    data=readFile(filesPath, name, length, type);
1075    // readFile() exits the tool if it fails
1076    addItem(name, data, length, TRUE, type);
1077}
1078
1079void
1080Package::addItems(const Package &listPkg) {
1081    const Item *pItem;
1082    int32_t i;
1083
1084    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1085        addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1086    }
1087}
1088
1089void
1090Package::removeItem(int32_t idx) {
1091    if(idx>=0) {
1092        // remove the item
1093        if(items[idx].isDataOwned) {
1094            free(items[idx].data);
1095        }
1096
1097        // move the following items up
1098        if((idx+1)<itemCount) {
1099            memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1100        }
1101        --itemCount;
1102
1103        if(idx<=findNextIndex) {
1104            --findNextIndex;
1105        }
1106    }
1107}
1108
1109void
1110Package::removeItems(const char *pattern) {
1111    int32_t idx;
1112
1113    findItems(pattern);
1114    while((idx=findNextItem())>=0) {
1115        removeItem(idx);
1116    }
1117}
1118
1119void
1120Package::removeItems(const Package &listPkg) {
1121    const Item *pItem;
1122    int32_t i;
1123
1124    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1125        removeItems(pItem->name);
1126    }
1127}
1128
1129void
1130Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1131    char filename[1024];
1132    UDataSwapper *ds;
1133    FILE *file;
1134    Item *pItem;
1135    int32_t fileLength;
1136    uint8_t itemCharset, outCharset;
1137    UBool itemIsBigEndian, outIsBigEndian;
1138
1139    if(idx<0 || itemCount<=idx) {
1140        return;
1141    }
1142    pItem=items+idx;
1143
1144    // swap the data to the outType
1145    // outType==0: don't swap
1146    if(outType!=0 && pItem->type!=outType) {
1147        // open the swapper
1148        UErrorCode errorCode=U_ZERO_ERROR;
1149        makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1150        makeTypeProps(outType, outCharset, outIsBigEndian);
1151        ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1152        if(U_FAILURE(errorCode)) {
1153            fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1154                    (long)idx, u_errorName(errorCode));
1155            exit(errorCode);
1156        }
1157
1158        ds->printError=printPackageError;
1159        ds->printErrorContext=stderr;
1160
1161        // swap the item from its platform properties to the desired ones
1162        udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1163        if(U_FAILURE(errorCode)) {
1164            fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1165            exit(errorCode);
1166        }
1167        udata_closeSwapper(ds);
1168        pItem->type=outType;
1169    }
1170
1171    // create the file and write its contents
1172    makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1173    file=fopen(filename, "wb");
1174    if(file==NULL) {
1175        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1176        exit(U_FILE_ACCESS_ERROR);
1177    }
1178    fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1179
1180    if(ferror(file) || fileLength!=pItem->length) {
1181        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1182        exit(U_FILE_ACCESS_ERROR);
1183    }
1184    fclose(file);
1185}
1186
1187void
1188Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1189    extractItem(filesPath, items[idx].name, idx, outType);
1190}
1191
1192void
1193Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1194    int32_t idx;
1195
1196    findItems(pattern);
1197    while((idx=findNextItem())>=0) {
1198        extractItem(filesPath, idx, outType);
1199    }
1200}
1201
1202void
1203Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1204    const Item *pItem;
1205    int32_t i;
1206
1207    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1208        extractItems(filesPath, pItem->name, outType);
1209    }
1210}
1211
1212int32_t
1213Package::getItemCount() const {
1214    return itemCount;
1215}
1216
1217const Item *
1218Package::getItem(int32_t idx) const {
1219    if (0 <= idx && idx < itemCount) {
1220        return &items[idx];
1221    }
1222    return NULL;
1223}
1224
1225void
1226Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1227    // check dependency: make sure the target item is in the package
1228    Package *me=(Package *)context;
1229    if(me->findItem(targetName)<0) {
1230        me->isMissingItems=TRUE;
1231        fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1232    }
1233}
1234
1235UBool
1236Package::checkDependencies() {
1237    isMissingItems=FALSE;
1238    enumDependencies(this, checkDependency);
1239    return (UBool)!isMissingItems;
1240}
1241
1242void
1243Package::enumDependencies(void *context, CheckDependency check) {
1244    int32_t i;
1245
1246    for(i=0; i<itemCount; ++i) {
1247        enumDependencies(items+i, context, check);
1248    }
1249}
1250
1251char *
1252Package::allocString(UBool in, int32_t length) {
1253    char *p;
1254    int32_t top;
1255
1256    if(in) {
1257        top=inStringTop;
1258        p=inStrings+top;
1259    } else {
1260        top=outStringTop;
1261        p=outStrings+top;
1262    }
1263    top+=length+1;
1264
1265    if(top>STRING_STORE_SIZE) {
1266        fprintf(stderr, "icupkg: string storage overflow\n");
1267        exit(U_BUFFER_OVERFLOW_ERROR);
1268    }
1269    if(in) {
1270        inStringTop=top;
1271    } else {
1272        outStringTop=top;
1273    }
1274    return p;
1275}
1276
1277void
1278Package::sortItems() {
1279    UErrorCode errorCode=U_ZERO_ERROR;
1280    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1281    if(U_FAILURE(errorCode)) {
1282        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1283        exit(errorCode);
1284    }
1285}
1286
1287void Package::setItemCapacity(int32_t max)
1288{
1289  if(max<=itemMax) {
1290    return;
1291  }
1292  Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
1293  Item *oldItems = items;
1294  if(newItems == NULL) {
1295    fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
1296        (unsigned long)max*sizeof(items[0]), max);
1297    exit(U_MEMORY_ALLOCATION_ERROR);
1298  }
1299  if(items && itemCount>0) {
1300    uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
1301  }
1302  itemMax = max;
1303  items = newItems;
1304  uprv_free(oldItems);
1305}
1306
1307void Package::ensureItemCapacity()
1308{
1309  if((itemCount+1)>itemMax) {
1310    setItemCapacity(itemCount+kItemsChunk);
1311  }
1312}
1313
1314U_NAMESPACE_END
1315