1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2015, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  package.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005aug25
14*   created by: Markus W. Scherer
15*
16*   Read, modify, and write ICU .dat data package files.
17*   This is an integral part of the icupkg tool, moved to the toolutil library
18*   because parts of tool implementations tend to be later shared by
19*   other tools.
20*   Subsumes functionality and implementation code from
21*   gencmn, decmn, and icuswap tools.
22*/
23
24#include "unicode/utypes.h"
25#include "unicode/putil.h"
26#include "unicode/udata.h"
27#include "cstring.h"
28#include "uarrsort.h"
29#include "ucmndata.h"
30#include "udataswp.h"
31#include "swapimpl.h"
32#include "toolutil.h"
33#include "package.h"
34#include "cmemory.h"
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39
40
41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */
42
43// general definitions ----------------------------------------------------- ***
44
45/* UDataInfo cf. udata.h */
46static const UDataInfo dataInfo={
47    (uint16_t)sizeof(UDataInfo),
48    0,
49
50    U_IS_BIG_ENDIAN,
51    U_CHARSET_FAMILY,
52    (uint8_t)sizeof(UChar),
53    0,
54
55    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
56    {1, 0, 0, 0},                 /* formatVersion */
57    {3, 0, 0, 0}                  /* dataVersion */
58};
59
60U_CDECL_BEGIN
61static void U_CALLCONV
62printPackageError(void *context, const char *fmt, va_list args) {
63    vfprintf((FILE *)context, fmt, args);
64}
65U_CDECL_END
66
67static uint16_t
68readSwapUInt16(uint16_t x) {
69    return (uint16_t)((x<<8)|(x>>8));
70}
71
72// platform types ---------------------------------------------------------- ***
73
74static const char *types="lb?e";
75
76enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
77
78static inline int32_t
79makeTypeEnum(uint8_t charset, UBool isBigEndian) {
80    return 2*(int32_t)charset+isBigEndian;
81}
82
83static inline int32_t
84makeTypeEnum(char type) {
85    return
86        type == 'l' ? TYPE_L :
87        type == 'b' ? TYPE_B :
88        type == 'e' ? TYPE_E :
89               -1;
90}
91
92static inline char
93makeTypeLetter(uint8_t charset, UBool isBigEndian) {
94    return types[makeTypeEnum(charset, isBigEndian)];
95}
96
97static inline char
98makeTypeLetter(int32_t typeEnum) {
99    return types[typeEnum];
100}
101
102static void
103makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
104    int32_t typeEnum=makeTypeEnum(type);
105    charset=(uint8_t)(typeEnum>>1);
106    isBigEndian=(UBool)(typeEnum&1);
107}
108
109U_CFUNC const UDataInfo *
110getDataInfo(const uint8_t *data, int32_t length,
111            int32_t &infoLength, int32_t &headerLength,
112            UErrorCode *pErrorCode) {
113    const DataHeader *pHeader;
114    const UDataInfo *pInfo;
115
116    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
117        return NULL;
118    }
119    if( data==NULL ||
120        (length>=0 && length<(int32_t)sizeof(DataHeader))
121    ) {
122        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
123        return NULL;
124    }
125
126    pHeader=(const DataHeader *)data;
127    pInfo=&pHeader->info;
128    if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
129        pHeader->dataHeader.magic1!=0xda ||
130        pHeader->dataHeader.magic2!=0x27 ||
131        pInfo->sizeofUChar!=2
132    ) {
133        *pErrorCode=U_UNSUPPORTED_ERROR;
134        return NULL;
135    }
136
137    if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
138        headerLength=pHeader->dataHeader.headerSize;
139        infoLength=pInfo->size;
140    } else {
141        headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
142        infoLength=readSwapUInt16(pInfo->size);
143    }
144
145    if( headerLength<(int32_t)sizeof(DataHeader) ||
146        infoLength<(int32_t)sizeof(UDataInfo) ||
147        headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
148        (length>=0 && length<headerLength)
149    ) {
150        *pErrorCode=U_UNSUPPORTED_ERROR;
151        return NULL;
152    }
153
154    return pInfo;
155}
156
157static int32_t
158getTypeEnumForInputData(const uint8_t *data, int32_t length,
159                        UErrorCode *pErrorCode) {
160    const UDataInfo *pInfo;
161    int32_t infoLength, headerLength;
162
163    /* getDataInfo() checks for illegal arguments */
164    pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
165    if(pInfo==NULL) {
166        return -1;
167    }
168
169    return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
170}
171
172// file handling ----------------------------------------------------------- ***
173
174static void
175extractPackageName(const char *filename,
176                   char pkg[], int32_t capacity) {
177    const char *basename;
178    int32_t len;
179
180    basename=findBasename(filename);
181    len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
182
183    if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
184        fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
185                         basename);
186        exit(U_ILLEGAL_ARGUMENT_ERROR);
187    }
188
189    if(len>=capacity) {
190        fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
191                         basename, (long)capacity);
192        exit(U_ILLEGAL_ARGUMENT_ERROR);
193    }
194
195    memcpy(pkg, basename, len);
196    pkg[len]=0;
197}
198
199static int32_t
200getFileLength(FILE *f) {
201    int32_t length;
202
203    fseek(f, 0, SEEK_END);
204    length=(int32_t)ftell(f);
205    fseek(f, 0, SEEK_SET);
206    return length;
207}
208
209/*
210 * Turn tree separators and alternate file separators into normal file separators.
211 */
212#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
213#define treeToPath(s)
214#else
215static void
216treeToPath(char *s) {
217    char *t;
218
219    for(t=s; *t!=0; ++t) {
220        if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
221            *t=U_FILE_SEP_CHAR;
222        }
223    }
224}
225#endif
226
227/*
228 * Turn file separators into tree separators.
229 */
230#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
231#define pathToTree(s)
232#else
233static void
234pathToTree(char *s) {
235    char *t;
236
237    for(t=s; *t!=0; ++t) {
238        if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
239            *t=U_TREE_ENTRY_SEP_CHAR;
240        }
241    }
242}
243#endif
244
245/*
246 * Prepend the path (if any) to the name and run the name through treeToName().
247 */
248static void
249makeFullFilename(const char *path, const char *name,
250                 char *filename, int32_t capacity) {
251    char *s;
252
253    // prepend the path unless NULL or empty
254    if(path!=NULL && path[0]!=0) {
255        if((int32_t)(strlen(path)+1)>=capacity) {
256            fprintf(stderr, "pathname too long: \"%s\"\n", path);
257            exit(U_BUFFER_OVERFLOW_ERROR);
258        }
259        strcpy(filename, path);
260
261        // make sure the path ends with a file separator
262        s=strchr(filename, 0);
263        if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
264            *s++=U_FILE_SEP_CHAR;
265        }
266    } else {
267        s=filename;
268    }
269
270    // turn the name into a filename, turn tree separators into file separators
271    if((int32_t)((s-filename)+strlen(name))>=capacity) {
272        fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
273        exit(U_BUFFER_OVERFLOW_ERROR);
274    }
275    strcpy(s, name);
276    treeToPath(s);
277}
278
279static void
280makeFullFilenameAndDirs(const char *path, const char *name,
281                        char *filename, int32_t capacity) {
282    char *sep;
283    UErrorCode errorCode;
284
285    makeFullFilename(path, name, filename, capacity);
286
287    // make tree directories
288    errorCode=U_ZERO_ERROR;
289    sep=strchr(filename, 0)-strlen(name);
290    while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
291        if(sep!=filename) {
292            *sep=0;                 // truncate temporarily
293            uprv_mkdir(filename, &errorCode);
294            if(U_FAILURE(errorCode)) {
295                fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
296                exit(U_FILE_ACCESS_ERROR);
297            }
298        }
299        *sep++=U_FILE_SEP_CHAR; // restore file separator character
300    }
301}
302
303static uint8_t *
304readFile(const char *path, const char *name, int32_t &length, char &type) {
305    char filename[1024];
306    FILE *file;
307    UErrorCode errorCode;
308    int32_t fileLength, typeEnum;
309
310    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
311
312    /* open the input file, get its length, allocate memory for it, read the file */
313    file=fopen(filename, "rb");
314    if(file==NULL) {
315        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
316        exit(U_FILE_ACCESS_ERROR);
317    }
318
319    /* get the file length */
320    fileLength=getFileLength(file);
321    if(ferror(file) || fileLength<=0) {
322        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
323        fclose(file);
324        exit(U_FILE_ACCESS_ERROR);
325    }
326
327    /* allocate the buffer, pad to multiple of 16 */
328    length=(fileLength+0xf)&~0xf;
329    icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length));
330    if(data.isNull()) {
331        fclose(file);
332        fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length);
333        exit(U_MEMORY_ALLOCATION_ERROR);
334    }
335
336    /* read the file */
337    if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) {
338        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
339        fclose(file);
340        exit(U_FILE_ACCESS_ERROR);
341    }
342
343    /* pad the file to a multiple of 16 using the usual padding byte */
344    if(fileLength<length) {
345        memset(data.getAlias()+fileLength, 0xaa, length-fileLength);
346    }
347
348    fclose(file);
349
350    // minimum check for ICU-format data
351    errorCode=U_ZERO_ERROR;
352    typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode);
353    if(typeEnum<0 || U_FAILURE(errorCode)) {
354        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
355#if !UCONFIG_NO_LEGACY_CONVERSION
356        exit(U_INVALID_FORMAT_ERROR);
357#else
358        fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n");
359        exit(0);
360#endif
361    }
362    type=makeTypeLetter(typeEnum);
363
364    return data.orphan();
365}
366
367// .dat package file representation ---------------------------------------- ***
368
369U_CDECL_BEGIN
370
371static int32_t U_CALLCONV
372compareItems(const void * /*context*/, const void *left, const void *right) {
373    U_NAMESPACE_USE
374
375    return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
376}
377
378U_CDECL_END
379
380U_NAMESPACE_BEGIN
381
382Package::Package()
383        : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) {
384    inPkgName[0]=0;
385    pkgPrefix[0]=0;
386    inData=NULL;
387    inLength=0;
388    inCharset=U_CHARSET_FAMILY;
389    inIsBigEndian=U_IS_BIG_ENDIAN;
390
391    itemCount=0;
392    itemMax=0;
393    items=NULL;
394
395    inStringTop=outStringTop=0;
396
397    matchMode=0;
398    findPrefix=findSuffix=NULL;
399    findPrefixLength=findSuffixLength=0;
400    findNextIndex=-1;
401
402    // create a header for an empty package
403    DataHeader *pHeader;
404    pHeader=(DataHeader *)header;
405    pHeader->dataHeader.magic1=0xda;
406    pHeader->dataHeader.magic2=0x27;
407    memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
408    headerLength=(int32_t)(4+sizeof(dataInfo));
409    if(headerLength&0xf) {
410        /* NUL-pad the header to a multiple of 16 */
411        int32_t length=(headerLength+0xf)&~0xf;
412        memset(header+headerLength, 0, length-headerLength);
413        headerLength=length;
414    }
415    pHeader->dataHeader.headerSize=(uint16_t)headerLength;
416}
417
418Package::~Package() {
419    int32_t idx;
420
421    uprv_free(inData);
422
423    for(idx=0; idx<itemCount; ++idx) {
424        if(items[idx].isDataOwned) {
425            uprv_free(items[idx].data);
426        }
427    }
428
429    uprv_free((void*)items);
430}
431
432void
433Package::setPrefix(const char *p) {
434    if(strlen(p)>=sizeof(pkgPrefix)) {
435        fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p);
436        exit(U_ILLEGAL_ARGUMENT_ERROR);
437    }
438    strcpy(pkgPrefix, p);
439}
440
441void
442Package::readPackage(const char *filename) {
443    UDataSwapper *ds;
444    const UDataInfo *pInfo;
445    UErrorCode errorCode;
446
447    const uint8_t *inBytes;
448
449    int32_t length, offset, i;
450    int32_t itemLength, typeEnum;
451    char type;
452
453    const UDataOffsetTOCEntry *inEntries;
454
455    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
456
457    /* read the file */
458    inData=readFile(NULL, filename, inLength, type);
459    length=inLength;
460
461    /*
462     * swap the header - even if the swapping itself is a no-op
463     * because it tells us the header length
464     */
465    errorCode=U_ZERO_ERROR;
466    makeTypeProps(type, inCharset, inIsBigEndian);
467    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
468    if(U_FAILURE(errorCode)) {
469        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
470                filename, u_errorName(errorCode));
471        exit(errorCode);
472    }
473
474    ds->printError=printPackageError;
475    ds->printErrorContext=stderr;
476
477    headerLength=sizeof(header);
478    if(length<headerLength) {
479        headerLength=length;
480    }
481    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
482    if(U_FAILURE(errorCode)) {
483        exit(errorCode);
484    }
485
486    /* check data format and format version */
487    pInfo=(const UDataInfo *)((const char *)inData+4);
488    if(!(
489        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
490        pInfo->dataFormat[1]==0x6d &&
491        pInfo->dataFormat[2]==0x6e &&
492        pInfo->dataFormat[3]==0x44 &&
493        pInfo->formatVersion[0]==1
494    )) {
495        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
496                pInfo->dataFormat[0], pInfo->dataFormat[1],
497                pInfo->dataFormat[2], pInfo->dataFormat[3],
498                pInfo->formatVersion[0]);
499        exit(U_UNSUPPORTED_ERROR);
500    }
501    inIsBigEndian=(UBool)pInfo->isBigEndian;
502    inCharset=pInfo->charsetFamily;
503
504    inBytes=(const uint8_t *)inData+headerLength;
505    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
506
507    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
508    length-=headerLength;
509    if(length<4) {
510        /* itemCount does not fit */
511        offset=0x7fffffff;
512    } else {
513        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
514        setItemCapacity(itemCount); /* resize so there's space */
515        if(itemCount==0) {
516            offset=4;
517        } else if(length<(4+8*itemCount)) {
518            /* ToC table does not fit */
519            offset=0x7fffffff;
520        } else {
521            /* offset of the last item plus at least 20 bytes for its header */
522            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
523        }
524    }
525    if(length<offset) {
526        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
527                        (long)length);
528        exit(U_INDEX_OUTOFBOUNDS_ERROR);
529    }
530    /* do not modify the package length variable until the last item's length is set */
531
532    if(itemCount<=0) {
533        if(doAutoPrefix) {
534            fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n");
535            exit(U_INVALID_FORMAT_ERROR);
536        }
537    } else {
538        char prefix[MAX_PKG_NAME_LENGTH+4];
539        char *s, *inItemStrings;
540
541        if(itemCount>itemMax) {
542            fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax);
543            exit(U_BUFFER_OVERFLOW_ERROR);
544        }
545
546        /* swap the item name strings */
547        int32_t stringsOffset=4+8*itemCount;
548        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
549
550        // don't include padding bytes at the end of the item names
551        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
552            --itemLength;
553        }
554
555        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
556            fprintf(stderr, "icupkg: total length of item name strings too long\n");
557            exit(U_BUFFER_OVERFLOW_ERROR);
558        }
559
560        inItemStrings=inStrings+inStringTop;
561        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
562        if(U_FAILURE(errorCode)) {
563            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
564            exit(U_INVALID_FORMAT_ERROR);
565        }
566        inStringTop+=itemLength;
567
568        // reset the Item entries
569        memset(items, 0, itemCount*sizeof(Item));
570
571        /*
572         * Get the common prefix of the items.
573         * New-style ICU .dat packages use tree separators ('/') between package names,
574         * tree names, and item names,
575         * while old-style ICU .dat packages (before multi-tree support)
576         * use an underscore ('_') between package and item names.
577         */
578        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
579        s=inItemStrings+offset;  // name of the first entry
580        int32_t prefixLength;
581        if(doAutoPrefix) {
582            // Use the first entry's prefix. Must be a new-style package.
583            const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR);
584            if(prefixLimit==NULL) {
585                fprintf(stderr,
586                        "icupkg: --auto_toc_prefix[_with_type] but "
587                        "the first entry \"%s\" does not contain a '%c'\n",
588                        s, U_TREE_ENTRY_SEP_CHAR);
589                exit(U_INVALID_FORMAT_ERROR);
590            }
591            prefixLength=(int32_t)(prefixLimit-s);
592            if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) {
593                fprintf(stderr,
594                        "icupkg: --auto_toc_prefix[_with_type] but "
595                        "the prefix of the first entry \"%s\" is empty or too long\n",
596                        s);
597                exit(U_INVALID_FORMAT_ERROR);
598            }
599            if(prefixEndsWithType && s[prefixLength-1]!=type) {
600                fprintf(stderr,
601                        "icupkg: --auto_toc_prefix_with_type but "
602                        "the prefix of the first entry \"%s\" does not end with '%c'\n",
603                        s, type);
604                exit(U_INVALID_FORMAT_ERROR);
605            }
606            memcpy(pkgPrefix, s, prefixLength);
607            pkgPrefix[prefixLength]=0;
608            memcpy(prefix, s, ++prefixLength);  // include the /
609        } else {
610            // Use the package basename as prefix.
611            int32_t inPkgNameLength=strlen(inPkgName);
612            memcpy(prefix, inPkgName, inPkgNameLength);
613            prefixLength=inPkgNameLength;
614
615            if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
616                0==memcmp(s, inPkgName, inPkgNameLength) &&
617                s[inPkgNameLength]=='_'
618            ) {
619                // old-style .dat package
620                prefix[prefixLength++]='_';
621            } else {
622                // new-style .dat package
623                prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
624                // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
625                // then the test in the loop below will fail
626            }
627        }
628        prefix[prefixLength]=0;
629
630        /* read the ToC table */
631        for(i=0; i<itemCount; ++i) {
632            // skip the package part of the item name, error if it does not match the actual package name
633            // or if nothing follows the package name
634            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
635            s=inItemStrings+offset;
636            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
637                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
638                        s, prefix);
639                exit(U_INVALID_FORMAT_ERROR);
640            }
641            items[i].name=s+prefixLength;
642
643            // set the item's data
644            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
645            if(i>0) {
646                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
647
648                // set the previous item's platform type
649                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
650                if(typeEnum<0 || U_FAILURE(errorCode)) {
651                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
652                    exit(U_INVALID_FORMAT_ERROR);
653                }
654                items[i-1].type=makeTypeLetter(typeEnum);
655            }
656            items[i].isDataOwned=FALSE;
657        }
658        // set the last item's length
659        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
660
661        // set the last item's platform type
662        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
663        if(typeEnum<0 || U_FAILURE(errorCode)) {
664            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
665            exit(U_INVALID_FORMAT_ERROR);
666        }
667        items[itemCount-1].type=makeTypeLetter(typeEnum);
668
669        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
670            // sort the item names for the local charset
671            sortItems();
672        }
673    }
674
675    udata_closeSwapper(ds);
676}
677
678char
679Package::getInType() {
680    return makeTypeLetter(inCharset, inIsBigEndian);
681}
682
683void
684Package::writePackage(const char *filename, char outType, const char *comment) {
685    char prefix[MAX_PKG_NAME_LENGTH+4];
686    UDataOffsetTOCEntry entry;
687    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
688    FILE *file;
689    Item *pItem;
690    char *name;
691    UErrorCode errorCode;
692    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
693    uint8_t outCharset;
694    UBool outIsBigEndian;
695
696    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
697
698    // if there is an explicit comment, then use it, else use what's in the current header
699    if(comment!=NULL) {
700        /* get the header size minus the current comment */
701        DataHeader *pHeader;
702        int32_t length;
703
704        pHeader=(DataHeader *)header;
705        headerLength=4+pHeader->info.size;
706        length=(int32_t)strlen(comment);
707        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
708            fprintf(stderr, "icupkg: comment too long\n");
709            exit(U_BUFFER_OVERFLOW_ERROR);
710        }
711        memcpy(header+headerLength, comment, length+1);
712        headerLength+=length;
713        if(headerLength&0xf) {
714            /* NUL-pad the header to a multiple of 16 */
715            length=(headerLength+0xf)&~0xf;
716            memset(header+headerLength, 0, length-headerLength);
717            headerLength=length;
718        }
719        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
720    }
721
722    makeTypeProps(outType, outCharset, outIsBigEndian);
723
724    // open (TYPE_COUNT-2) swappers
725    // one is a no-op for local type==outType
726    // one type (TYPE_LE) is bogus
727    errorCode=U_ZERO_ERROR;
728    i=makeTypeEnum(outType);
729    ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
730    ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
731    ds[TYPE_LE]=NULL;
732    ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
733    if(U_FAILURE(errorCode)) {
734        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
735        exit(errorCode);
736    }
737    for(i=0; i<TYPE_COUNT; ++i) {
738        if(ds[i]!=NULL) {
739            ds[i]->printError=printPackageError;
740            ds[i]->printErrorContext=stderr;
741        }
742    }
743
744    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
745
746    // create the file and write its contents
747    file=fopen(filename, "wb");
748    if(file==NULL) {
749        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
750        exit(U_FILE_ACCESS_ERROR);
751    }
752
753    // swap and write the header
754    if(dsLocalToOut!=NULL) {
755        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
756        if(U_FAILURE(errorCode)) {
757            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
758            exit(errorCode);
759        }
760    }
761    length=(int32_t)fwrite(header, 1, headerLength, file);
762    if(length!=headerLength) {
763        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
764        exit(U_FILE_ACCESS_ERROR);
765    }
766
767    // prepare and swap the package name with a tree separator
768    // for prepending to item names
769    if(pkgPrefix[0]==0) {
770        prefixLength=(int32_t)strlen(prefix);
771    } else {
772        prefixLength=(int32_t)strlen(pkgPrefix);
773        memcpy(prefix, pkgPrefix, prefixLength);
774        if(prefixEndsWithType) {
775            prefix[prefixLength-1]=outType;
776        }
777    }
778    prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
779    prefix[prefixLength]=0;
780    if(dsLocalToOut!=NULL) {
781        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
782        if(U_FAILURE(errorCode)) {
783            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
784            exit(errorCode);
785        }
786
787        // swap and sort the item names (sorting needs to be done in the output charset)
788        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
789        if(U_FAILURE(errorCode)) {
790            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
791            exit(errorCode);
792        }
793        sortItems();
794    }
795
796    // create the output item names in sorted order, with the package name prepended to each
797    for(i=0; i<itemCount; ++i) {
798        length=(int32_t)strlen(items[i].name);
799        name=allocString(FALSE, length+prefixLength);
800        memcpy(name, prefix, prefixLength);
801        memcpy(name+prefixLength, items[i].name, length+1);
802        items[i].name=name;
803    }
804
805    // calculate offsets for item names and items, pad to 16-align items
806    // align only the first item; each item's length is a multiple of 16
807    basenameOffset=4+8*itemCount;
808    offset=basenameOffset+outStringTop;
809    if((length=(offset&15))!=0) {
810        length=16-length;
811        memset(allocString(FALSE, length-1), 0xaa, length);
812        offset+=length;
813    }
814
815    // write the table of contents
816    // first the itemCount
817    outInt32=itemCount;
818    if(dsLocalToOut!=NULL) {
819        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
820        if(U_FAILURE(errorCode)) {
821            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
822            exit(errorCode);
823        }
824    }
825    length=(int32_t)fwrite(&outInt32, 1, 4, file);
826    if(length!=4) {
827        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
828        exit(U_FILE_ACCESS_ERROR);
829    }
830
831    // then write the item entries (and collect the maxItemLength)
832    maxItemLength=0;
833    for(i=0; i<itemCount; ++i) {
834        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
835        entry.dataOffset=(uint32_t)offset;
836        if(dsLocalToOut!=NULL) {
837            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
838            if(U_FAILURE(errorCode)) {
839                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
840                exit(errorCode);
841            }
842        }
843        length=(int32_t)fwrite(&entry, 1, 8, file);
844        if(length!=8) {
845            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
846            exit(U_FILE_ACCESS_ERROR);
847        }
848
849        length=items[i].length;
850        if(length>maxItemLength) {
851            maxItemLength=length;
852        }
853        offset+=length;
854    }
855
856    // write the item names
857    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
858    if(length!=outStringTop) {
859        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
860        exit(U_FILE_ACCESS_ERROR);
861    }
862
863    // write the items
864    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
865        int32_t type=makeTypeEnum(pItem->type);
866        if(ds[type]!=NULL) {
867            // swap each item from its platform properties to the desired ones
868            udata_swap(
869                ds[type],
870                pItem->data, pItem->length, pItem->data,
871                &errorCode);
872            if(U_FAILURE(errorCode)) {
873                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
874                exit(errorCode);
875            }
876        }
877        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
878        if(length!=pItem->length) {
879            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
880            exit(U_FILE_ACCESS_ERROR);
881        }
882    }
883
884    if(ferror(file)) {
885        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
886        exit(U_FILE_ACCESS_ERROR);
887    }
888
889    fclose(file);
890    for(i=0; i<TYPE_COUNT; ++i) {
891        udata_closeSwapper(ds[i]);
892    }
893}
894
895int32_t
896Package::findItem(const char *name, int32_t length) const {
897    int32_t i, start, limit;
898    int result;
899
900    /* do a binary search for the string */
901    start=0;
902    limit=itemCount;
903    while(start<limit) {
904        i=(start+limit)/2;
905        if(length>=0) {
906            result=strncmp(name, items[i].name, length);
907        } else {
908            result=strcmp(name, items[i].name);
909        }
910
911        if(result==0) {
912            /* found */
913            if(length>=0) {
914                /*
915                 * if we compared just prefixes, then we may need to back up
916                 * to the first item with this prefix
917                 */
918                while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
919                    --i;
920                }
921            }
922            return i;
923        } else if(result<0) {
924            limit=i;
925        } else /* result>0 */ {
926            start=i+1;
927        }
928    }
929
930    return ~start; /* not found, return binary-not of the insertion point */
931}
932
933void
934Package::findItems(const char *pattern) {
935    const char *wild;
936
937    if(pattern==NULL || *pattern==0) {
938        findNextIndex=-1;
939        return;
940    }
941
942    findPrefix=pattern;
943    findSuffix=NULL;
944    findSuffixLength=0;
945
946    wild=strchr(pattern, '*');
947    if(wild==NULL) {
948        // no wildcard
949        findPrefixLength=(int32_t)strlen(pattern);
950    } else {
951        // one wildcard
952        findPrefixLength=(int32_t)(wild-pattern);
953        findSuffix=wild+1;
954        findSuffixLength=(int32_t)strlen(findSuffix);
955        if(NULL!=strchr(findSuffix, '*')) {
956            // two or more wildcards
957            fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
958            exit(U_PARSE_ERROR);
959        }
960    }
961
962    if(findPrefixLength==0) {
963        findNextIndex=0;
964    } else {
965        findNextIndex=findItem(findPrefix, findPrefixLength);
966    }
967}
968
969int32_t
970Package::findNextItem() {
971    const char *name, *middle, *treeSep;
972    int32_t idx, nameLength, middleLength;
973
974    if(findNextIndex<0) {
975        return -1;
976    }
977
978    while(findNextIndex<itemCount) {
979        idx=findNextIndex++;
980        name=items[idx].name;
981        nameLength=(int32_t)strlen(name);
982        if(nameLength<(findPrefixLength+findSuffixLength)) {
983            // item name too short for prefix & suffix
984            continue;
985        }
986        if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
987            // left the range of names with this prefix
988            break;
989        }
990        middle=name+findPrefixLength;
991        middleLength=nameLength-findPrefixLength-findSuffixLength;
992        if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
993            // suffix does not match
994            continue;
995        }
996        // prefix & suffix match
997
998        if(matchMode&MATCH_NOSLASH) {
999            treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
1000            if(treeSep!=NULL && (treeSep-middle)<middleLength) {
1001                // the middle (matching the * wildcard) contains a tree separator /
1002                continue;
1003            }
1004        }
1005
1006        // found a matching item
1007        return idx;
1008    }
1009
1010    // no more items
1011    findNextIndex=-1;
1012    return -1;
1013}
1014
1015void
1016Package::setMatchMode(uint32_t mode) {
1017    matchMode=mode;
1018}
1019
1020void
1021Package::addItem(const char *name) {
1022    addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
1023}
1024
1025void
1026Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
1027    int32_t idx;
1028
1029    idx=findItem(name);
1030    if(idx<0) {
1031        // new item, make space at the insertion point
1032        ensureItemCapacity();
1033        // move the following items down
1034        idx=~idx;
1035        if(idx<itemCount) {
1036            memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
1037        }
1038        ++itemCount;
1039
1040        // reset this Item entry
1041        memset(items+idx, 0, sizeof(Item));
1042
1043        // copy the item's name
1044        items[idx].name=allocString(TRUE, strlen(name));
1045        strcpy(items[idx].name, name);
1046        pathToTree(items[idx].name);
1047    } else {
1048        // same-name item found, replace it
1049        if(items[idx].isDataOwned) {
1050            uprv_free(items[idx].data);
1051        }
1052
1053        // keep the item's name since it is the same
1054    }
1055
1056    // set the item's data
1057    items[idx].data=data;
1058    items[idx].length=length;
1059    items[idx].isDataOwned=isDataOwned;
1060    items[idx].type=type;
1061}
1062
1063void
1064Package::addFile(const char *filesPath, const char *name) {
1065    uint8_t *data;
1066    int32_t length;
1067    char type;
1068
1069    data=readFile(filesPath, name, length, type);
1070    // readFile() exits the tool if it fails
1071    addItem(name, data, length, TRUE, type);
1072}
1073
1074void
1075Package::addItems(const Package &listPkg) {
1076    const Item *pItem;
1077    int32_t i;
1078
1079    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1080        addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1081    }
1082}
1083
1084void
1085Package::removeItem(int32_t idx) {
1086    if(idx>=0) {
1087        // remove the item
1088        if(items[idx].isDataOwned) {
1089            uprv_free(items[idx].data);
1090        }
1091
1092        // move the following items up
1093        if((idx+1)<itemCount) {
1094            memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1095        }
1096        --itemCount;
1097
1098        if(idx<=findNextIndex) {
1099            --findNextIndex;
1100        }
1101    }
1102}
1103
1104void
1105Package::removeItems(const char *pattern) {
1106    int32_t idx;
1107
1108    findItems(pattern);
1109    while((idx=findNextItem())>=0) {
1110        removeItem(idx);
1111    }
1112}
1113
1114void
1115Package::removeItems(const Package &listPkg) {
1116    const Item *pItem;
1117    int32_t i;
1118
1119    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1120        removeItems(pItem->name);
1121    }
1122}
1123
1124void
1125Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1126    char filename[1024];
1127    UDataSwapper *ds;
1128    FILE *file;
1129    Item *pItem;
1130    int32_t fileLength;
1131    uint8_t itemCharset, outCharset;
1132    UBool itemIsBigEndian, outIsBigEndian;
1133
1134    if(idx<0 || itemCount<=idx) {
1135        return;
1136    }
1137    pItem=items+idx;
1138
1139    // swap the data to the outType
1140    // outType==0: don't swap
1141    if(outType!=0 && pItem->type!=outType) {
1142        // open the swapper
1143        UErrorCode errorCode=U_ZERO_ERROR;
1144        makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1145        makeTypeProps(outType, outCharset, outIsBigEndian);
1146        ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1147        if(U_FAILURE(errorCode)) {
1148            fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1149                    (long)idx, u_errorName(errorCode));
1150            exit(errorCode);
1151        }
1152
1153        ds->printError=printPackageError;
1154        ds->printErrorContext=stderr;
1155
1156        // swap the item from its platform properties to the desired ones
1157        udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1158        if(U_FAILURE(errorCode)) {
1159            fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1160            exit(errorCode);
1161        }
1162        udata_closeSwapper(ds);
1163        pItem->type=outType;
1164    }
1165
1166    // create the file and write its contents
1167    makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1168    file=fopen(filename, "wb");
1169    if(file==NULL) {
1170        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1171        exit(U_FILE_ACCESS_ERROR);
1172    }
1173    fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1174
1175    if(ferror(file) || fileLength!=pItem->length) {
1176        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1177        exit(U_FILE_ACCESS_ERROR);
1178    }
1179    fclose(file);
1180}
1181
1182void
1183Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1184    extractItem(filesPath, items[idx].name, idx, outType);
1185}
1186
1187void
1188Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1189    int32_t idx;
1190
1191    findItems(pattern);
1192    while((idx=findNextItem())>=0) {
1193        extractItem(filesPath, idx, outType);
1194    }
1195}
1196
1197void
1198Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1199    const Item *pItem;
1200    int32_t i;
1201
1202    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1203        extractItems(filesPath, pItem->name, outType);
1204    }
1205}
1206
1207int32_t
1208Package::getItemCount() const {
1209    return itemCount;
1210}
1211
1212const Item *
1213Package::getItem(int32_t idx) const {
1214    if (0 <= idx && idx < itemCount) {
1215        return &items[idx];
1216    }
1217    return NULL;
1218}
1219
1220void
1221Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1222    // check dependency: make sure the target item is in the package
1223    Package *me=(Package *)context;
1224    if(me->findItem(targetName)<0) {
1225        me->isMissingItems=TRUE;
1226        fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1227    }
1228}
1229
1230UBool
1231Package::checkDependencies() {
1232    isMissingItems=FALSE;
1233    enumDependencies(this, checkDependency);
1234    return (UBool)!isMissingItems;
1235}
1236
1237void
1238Package::enumDependencies(void *context, CheckDependency check) {
1239    int32_t i;
1240
1241    for(i=0; i<itemCount; ++i) {
1242        enumDependencies(items+i, context, check);
1243    }
1244}
1245
1246char *
1247Package::allocString(UBool in, int32_t length) {
1248    char *p;
1249    int32_t top;
1250
1251    if(in) {
1252        top=inStringTop;
1253        p=inStrings+top;
1254    } else {
1255        top=outStringTop;
1256        p=outStrings+top;
1257    }
1258    top+=length+1;
1259
1260    if(top>STRING_STORE_SIZE) {
1261        fprintf(stderr, "icupkg: string storage overflow\n");
1262        exit(U_BUFFER_OVERFLOW_ERROR);
1263    }
1264    if(in) {
1265        inStringTop=top;
1266    } else {
1267        outStringTop=top;
1268    }
1269    return p;
1270}
1271
1272void
1273Package::sortItems() {
1274    UErrorCode errorCode=U_ZERO_ERROR;
1275    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1276    if(U_FAILURE(errorCode)) {
1277        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1278        exit(errorCode);
1279    }
1280}
1281
1282void Package::setItemCapacity(int32_t max)
1283{
1284  if(max<=itemMax) {
1285    return;
1286  }
1287  Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0]));
1288  Item *oldItems = items;
1289  if(newItems == NULL) {
1290    fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n",
1291        (unsigned long)max*sizeof(items[0]), max);
1292    exit(U_MEMORY_ALLOCATION_ERROR);
1293  }
1294  if(items && itemCount>0) {
1295    uprv_memcpy(newItems, items, itemCount*sizeof(items[0]));
1296  }
1297  itemMax = max;
1298  items = newItems;
1299  uprv_free(oldItems);
1300}
1301
1302void Package::ensureItemCapacity()
1303{
1304  if((itemCount+1)>itemMax) {
1305    setItemCapacity(itemCount+kItemsChunk);
1306  }
1307}
1308
1309U_NAMESPACE_END
1310