package.cpp revision 85bf2e2fbc60a9f938064abc8127d61da7d19882
1/*
2*******************************************************************************
3*
4*   Copyright (C) 1999-2009, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  package.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005aug25
14*   created by: Markus W. Scherer
15*
16*   Read, modify, and write ICU .dat data package files.
17*   This is an integral part of the icupkg tool, moved to the toolutil library
18*   because parts of tool implementations tend to be later shared by
19*   other tools.
20*   Subsumes functionality and implementation code from
21*   gencmn, decmn, and icuswap tools.
22*/
23
24#include "unicode/utypes.h"
25#include "unicode/putil.h"
26#include "unicode/udata.h"
27#include "cstring.h"
28#include "uarrsort.h"
29#include "ucmndata.h"
30#include "udataswp.h"
31#include "swapimpl.h"
32#include "toolutil.h"
33#include "package.h"
34
35#include <stdio.h>
36#include <stdlib.h>
37#include <string.h>
38
39// general definitions ----------------------------------------------------- ***
40
41#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
42
43/* UDataInfo cf. udata.h */
44static const UDataInfo dataInfo={
45    (uint16_t)sizeof(UDataInfo),
46    0,
47
48    U_IS_BIG_ENDIAN,
49    U_CHARSET_FAMILY,
50    (uint8_t)sizeof(UChar),
51    0,
52
53    {0x43, 0x6d, 0x6e, 0x44},     /* dataFormat="CmnD" */
54    {1, 0, 0, 0},                 /* formatVersion */
55    {3, 0, 0, 0}                  /* dataVersion */
56};
57
58U_CDECL_BEGIN
59static void U_CALLCONV
60printPackageError(void *context, const char *fmt, va_list args) {
61    vfprintf((FILE *)context, fmt, args);
62}
63U_CDECL_END
64
65static uint16_t
66readSwapUInt16(uint16_t x) {
67    return (uint16_t)((x<<8)|(x>>8));
68}
69
70// platform types ---------------------------------------------------------- ***
71
72static const char *types="lb?e";
73
74enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT };
75
76static inline int32_t
77makeTypeEnum(uint8_t charset, UBool isBigEndian) {
78    return 2*(int32_t)charset+isBigEndian;
79}
80
81static inline int32_t
82makeTypeEnum(char type) {
83    return
84        type == 'l' ? TYPE_L :
85        type == 'b' ? TYPE_B :
86        type == 'e' ? TYPE_E :
87               -1;
88}
89
90static inline char
91makeTypeLetter(uint8_t charset, UBool isBigEndian) {
92    return types[makeTypeEnum(charset, isBigEndian)];
93}
94
95static inline char
96makeTypeLetter(int32_t typeEnum) {
97    return types[typeEnum];
98}
99
100static void
101makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) {
102    int32_t typeEnum=makeTypeEnum(type);
103    charset=(uint8_t)(typeEnum>>1);
104    isBigEndian=(UBool)(typeEnum&1);
105}
106
107U_CFUNC const UDataInfo *
108getDataInfo(const uint8_t *data, int32_t length,
109            int32_t &infoLength, int32_t &headerLength,
110            UErrorCode *pErrorCode) {
111    const DataHeader *pHeader;
112    const UDataInfo *pInfo;
113
114    if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) {
115        return NULL;
116    }
117    if( data==NULL ||
118        (length>=0 && length<(int32_t)sizeof(DataHeader))
119    ) {
120        *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR;
121        return NULL;
122    }
123
124    pHeader=(const DataHeader *)data;
125    pInfo=&pHeader->info;
126    if( (length>=0 && length<(int32_t)sizeof(DataHeader)) ||
127        pHeader->dataHeader.magic1!=0xda ||
128        pHeader->dataHeader.magic2!=0x27 ||
129        pInfo->sizeofUChar!=2
130    ) {
131        *pErrorCode=U_UNSUPPORTED_ERROR;
132        return NULL;
133    }
134
135    if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) {
136        headerLength=pHeader->dataHeader.headerSize;
137        infoLength=pInfo->size;
138    } else {
139        headerLength=readSwapUInt16(pHeader->dataHeader.headerSize);
140        infoLength=readSwapUInt16(pInfo->size);
141    }
142
143    if( headerLength<(int32_t)sizeof(DataHeader) ||
144        infoLength<(int32_t)sizeof(UDataInfo) ||
145        headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) ||
146        (length>=0 && length<headerLength)
147    ) {
148        *pErrorCode=U_UNSUPPORTED_ERROR;
149        return NULL;
150    }
151
152    return pInfo;
153}
154
155static int32_t
156getTypeEnumForInputData(const uint8_t *data, int32_t length,
157                        UErrorCode *pErrorCode) {
158    const UDataInfo *pInfo;
159    int32_t infoLength, headerLength;
160
161    /* getDataInfo() checks for illegal arguments */
162    pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode);
163    if(pInfo==NULL) {
164        return -1;
165    }
166
167    return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian);
168}
169
170// file handling ----------------------------------------------------------- ***
171
172static void
173extractPackageName(const char *filename,
174                   char pkg[], int32_t capacity) {
175    const char *basename;
176    int32_t len;
177
178    basename=findBasename(filename);
179    len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */
180
181    if(len<=0 || 0!=strcmp(basename+len, ".dat")) {
182        fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n",
183                         basename);
184        exit(U_ILLEGAL_ARGUMENT_ERROR);
185    }
186
187    if(len>=capacity) {
188        fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n",
189                         basename, (long)capacity);
190        exit(U_ILLEGAL_ARGUMENT_ERROR);
191    }
192
193    memcpy(pkg, basename, len);
194    pkg[len]=0;
195}
196
197static int32_t
198getFileLength(FILE *f) {
199    int32_t length;
200
201    fseek(f, 0, SEEK_END);
202    length=(int32_t)ftell(f);
203    fseek(f, 0, SEEK_SET);
204    return length;
205}
206
207/*
208 * Turn tree separators and alternate file separators into normal file separators.
209 */
210#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
211#define treeToPath(s)
212#else
213static void
214treeToPath(char *s) {
215    char *t;
216
217    for(t=s; *t!=0; ++t) {
218        if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
219            *t=U_FILE_SEP_CHAR;
220        }
221    }
222}
223#endif
224
225/*
226 * Turn file separators into tree separators.
227 */
228#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR
229#define pathToTree(s)
230#else
231static void
232pathToTree(char *s) {
233    char *t;
234
235    for(t=s; *t!=0; ++t) {
236        if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) {
237            *t=U_TREE_ENTRY_SEP_CHAR;
238        }
239    }
240}
241#endif
242
243/*
244 * Prepend the path (if any) to the name and run the name through treeToName().
245 */
246static void
247makeFullFilename(const char *path, const char *name,
248                 char *filename, int32_t capacity) {
249    char *s;
250
251    // prepend the path unless NULL or empty
252    if(path!=NULL && path[0]!=0) {
253        if((int32_t)(strlen(path)+1)>=capacity) {
254            fprintf(stderr, "pathname too long: \"%s\"\n", path);
255            exit(U_BUFFER_OVERFLOW_ERROR);
256        }
257        strcpy(filename, path);
258
259        // make sure the path ends with a file separator
260        s=strchr(filename, 0);
261        if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) {
262            *s++=U_FILE_SEP_CHAR;
263        }
264    } else {
265        s=filename;
266    }
267
268    // turn the name into a filename, turn tree separators into file separators
269    if((int32_t)((s-filename)+strlen(name))>=capacity) {
270        fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name);
271        exit(U_BUFFER_OVERFLOW_ERROR);
272    }
273    strcpy(s, name);
274    treeToPath(s);
275}
276
277static void
278makeFullFilenameAndDirs(const char *path, const char *name,
279                        char *filename, int32_t capacity) {
280    char *sep;
281    UErrorCode errorCode;
282
283    makeFullFilename(path, name, filename, capacity);
284
285    // make tree directories
286    errorCode=U_ZERO_ERROR;
287    sep=strchr(filename, 0)-strlen(name);
288    while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) {
289        if(sep!=filename) {
290            *sep=0;                 // truncate temporarily
291            uprv_mkdir(filename, &errorCode);
292            if(U_FAILURE(errorCode)) {
293                fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename);
294                exit(U_FILE_ACCESS_ERROR);
295            }
296        }
297        *sep++=U_FILE_SEP_CHAR; // restore file separator character
298    }
299}
300
301static uint8_t *
302readFile(const char *path, const char *name, int32_t &length, char &type) {
303    char filename[1024];
304    FILE *file;
305    uint8_t *data;
306    UErrorCode errorCode;
307    int32_t fileLength, typeEnum;
308
309    makeFullFilename(path, name, filename, (int32_t)sizeof(filename));
310
311    /* open the input file, get its length, allocate memory for it, read the file */
312    file=fopen(filename, "rb");
313    if(file==NULL) {
314        fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename);
315        exit(U_FILE_ACCESS_ERROR);
316    }
317
318    /* get the file length */
319    fileLength=getFileLength(file);
320    if(ferror(file) || fileLength<=0) {
321        fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename);
322        fclose(file);
323        exit(U_FILE_ACCESS_ERROR);
324    }
325
326    /* allocate the buffer, pad to multiple of 16 */
327    length=(fileLength+0xf)&~0xf;
328    data=(uint8_t *)malloc(length);
329    if(data==NULL) {
330        fclose(file);
331        exit(U_MEMORY_ALLOCATION_ERROR);
332    }
333
334    /* read the file */
335    if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) {
336        fprintf(stderr, "icupkg: error reading \"%s\"\n", filename);
337        fclose(file);
338        free(data);
339        exit(U_FILE_ACCESS_ERROR);
340    }
341
342    /* pad the file to a multiple of 16 using the usual padding byte */
343    if(fileLength<length) {
344        memset(data+fileLength, 0xaa, length-fileLength);
345    }
346
347    fclose(file);
348
349    // minimum check for ICU-format data
350    errorCode=U_ZERO_ERROR;
351    typeEnum=getTypeEnumForInputData(data, length, &errorCode);
352    if(typeEnum<0 || U_FAILURE(errorCode)) {
353        fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename);
354        free(data);
355        exit(U_INVALID_FORMAT_ERROR);
356    }
357    type=makeTypeLetter(typeEnum);
358
359    return data;
360}
361
362// .dat package file representation ---------------------------------------- ***
363
364U_CDECL_BEGIN
365
366static int32_t U_CALLCONV
367compareItems(const void * /*context*/, const void *left, const void *right) {
368    U_NAMESPACE_USE
369
370    return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name);
371}
372
373U_CDECL_END
374
375U_NAMESPACE_BEGIN
376
377Package::Package() {
378    inPkgName[0]=0;
379    inData=NULL;
380    inLength=0;
381    inCharset=U_CHARSET_FAMILY;
382    inIsBigEndian=U_IS_BIG_ENDIAN;
383
384    itemCount=0;
385    inStringTop=outStringTop=0;
386
387    matchMode=0;
388    findPrefix=findSuffix=NULL;
389    findPrefixLength=findSuffixLength=0;
390    findNextIndex=-1;
391
392    // create a header for an empty package
393    DataHeader *pHeader;
394    pHeader=(DataHeader *)header;
395    pHeader->dataHeader.magic1=0xda;
396    pHeader->dataHeader.magic2=0x27;
397    memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo));
398    headerLength=(int32_t)(4+sizeof(dataInfo));
399    if(headerLength&0xf) {
400        /* NUL-pad the header to a multiple of 16 */
401        int32_t length=(headerLength+0xf)&~0xf;
402        memset(header+headerLength, 0, length-headerLength);
403        headerLength=length;
404    }
405    pHeader->dataHeader.headerSize=(uint16_t)headerLength;
406}
407
408Package::~Package() {
409    int32_t idx;
410
411    free(inData);
412
413    for(idx=0; idx<itemCount; ++idx) {
414        if(items[idx].isDataOwned) {
415            free(items[idx].data);
416        }
417    }
418}
419
420void
421Package::readPackage(const char *filename) {
422    UDataSwapper *ds;
423    const UDataInfo *pInfo;
424    UErrorCode errorCode;
425
426    const uint8_t *inBytes;
427
428    int32_t length, offset, i;
429    int32_t itemLength, typeEnum;
430    char type;
431
432    const UDataOffsetTOCEntry *inEntries;
433
434    extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName));
435
436    /* read the file */
437    inData=readFile(NULL, filename, inLength, type);
438    length=inLength;
439
440    /*
441     * swap the header - even if the swapping itself is a no-op
442     * because it tells us the header length
443     */
444    errorCode=U_ZERO_ERROR;
445    makeTypeProps(type, inCharset, inIsBigEndian);
446    ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
447    if(U_FAILURE(errorCode)) {
448        fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
449                filename, u_errorName(errorCode));
450        exit(errorCode);
451    }
452
453    ds->printError=printPackageError;
454    ds->printErrorContext=stderr;
455
456    headerLength=sizeof(header);
457    if(length<headerLength) {
458        headerLength=length;
459    }
460    headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode);
461    if(U_FAILURE(errorCode)) {
462        exit(errorCode);
463    }
464
465    /* check data format and format version */
466    pInfo=(const UDataInfo *)((const char *)inData+4);
467    if(!(
468        pInfo->dataFormat[0]==0x43 &&   /* dataFormat="CmnD" */
469        pInfo->dataFormat[1]==0x6d &&
470        pInfo->dataFormat[2]==0x6e &&
471        pInfo->dataFormat[3]==0x44 &&
472        pInfo->formatVersion[0]==1
473    )) {
474        fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n",
475                pInfo->dataFormat[0], pInfo->dataFormat[1],
476                pInfo->dataFormat[2], pInfo->dataFormat[3],
477                pInfo->formatVersion[0]);
478        exit(U_UNSUPPORTED_ERROR);
479    }
480    inIsBigEndian=(UBool)pInfo->isBigEndian;
481    inCharset=pInfo->charsetFamily;
482
483    inBytes=(const uint8_t *)inData+headerLength;
484    inEntries=(const UDataOffsetTOCEntry *)(inBytes+4);
485
486    /* check that the itemCount fits, then the ToC table, then at least the header of the last item */
487    length-=headerLength;
488    if(length<4) {
489        /* itemCount does not fit */
490        offset=0x7fffffff;
491    } else {
492        itemCount=udata_readInt32(ds, *(const int32_t *)inBytes);
493        if(itemCount==0) {
494            offset=4;
495        } else if(length<(4+8*itemCount)) {
496            /* ToC table does not fit */
497            offset=0x7fffffff;
498        } else {
499            /* offset of the last item plus at least 20 bytes for its header */
500            offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset);
501        }
502    }
503    if(length<offset) {
504        fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n",
505                        (long)length);
506        exit(U_INDEX_OUTOFBOUNDS_ERROR);
507    }
508    /* do not modify the package length variable until the last item's length is set */
509
510    if(itemCount>0) {
511        char prefix[MAX_PKG_NAME_LENGTH+4];
512        char *s, *inItemStrings;
513        int32_t inPkgNameLength, prefixLength, stringsOffset;
514
515        if(itemCount>MAX_FILE_COUNT) {
516            fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
517            exit(U_BUFFER_OVERFLOW_ERROR);
518        }
519
520        /* swap the item name strings */
521        stringsOffset=4+8*itemCount;
522        itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset;
523
524        // don't include padding bytes at the end of the item names
525        while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) {
526            --itemLength;
527        }
528
529        if((inStringTop+itemLength)>STRING_STORE_SIZE) {
530            fprintf(stderr, "icupkg: total length of item name strings too long\n");
531            exit(U_BUFFER_OVERFLOW_ERROR);
532        }
533
534        inItemStrings=inStrings+inStringTop;
535        ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode);
536        if(U_FAILURE(errorCode)) {
537            fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n");
538            exit(U_INVALID_FORMAT_ERROR);
539        }
540        inStringTop+=itemLength;
541
542        // reset the Item entries
543        memset(items, 0, itemCount*sizeof(Item));
544
545        inPkgNameLength=strlen(inPkgName);
546        memcpy(prefix, inPkgName, inPkgNameLength);
547        prefixLength=inPkgNameLength;
548
549        /*
550         * Get the common prefix of the items.
551         * New-style ICU .dat packages use tree separators ('/') between package names,
552         * tree names, and item names,
553         * while old-style ICU .dat packages (before multi-tree support)
554         * use an underscore ('_') between package and item names.
555         */
556        offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset;
557        s=inItemStrings+offset;
558        if( (int32_t)strlen(s)>=(inPkgNameLength+2) &&
559            0==memcmp(s, inPkgName, inPkgNameLength) &&
560            s[inPkgNameLength]=='_'
561        ) {
562            // old-style .dat package
563            prefix[prefixLength++]='_';
564        } else {
565            // new-style .dat package
566            prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR;
567            // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR
568            // then the test in the loop below will fail
569        }
570        prefix[prefixLength]=0;
571
572        /* read the ToC table */
573        for(i=0; i<itemCount; ++i) {
574            // skip the package part of the item name, error if it does not match the actual package name
575            // or if nothing follows the package name
576            offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset;
577            s=inItemStrings+offset;
578            if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) {
579                fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n",
580                        s, prefix);
581                exit(U_UNSUPPORTED_ERROR);
582            }
583            items[i].name=s+prefixLength;
584
585            // set the item's data
586            items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset);
587            if(i>0) {
588                items[i-1].length=(int32_t)(items[i].data-items[i-1].data);
589
590                // set the previous item's platform type
591                typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode);
592                if(typeEnum<0 || U_FAILURE(errorCode)) {
593                    fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
594                    exit(U_INVALID_FORMAT_ERROR);
595                }
596                items[i-1].type=makeTypeLetter(typeEnum);
597            }
598            items[i].isDataOwned=FALSE;
599        }
600        // set the last item's length
601        items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset);
602
603        // set the last item's platform type
604        typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode);
605        if(typeEnum<0 || U_FAILURE(errorCode)) {
606            fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename);
607            exit(U_INVALID_FORMAT_ERROR);
608        }
609        items[itemCount-1].type=makeTypeLetter(typeEnum);
610
611        if(type!=U_ICUDATA_TYPE_LETTER[0]) {
612            // sort the item names for the local charset
613            sortItems();
614        }
615    }
616
617    udata_closeSwapper(ds);
618}
619
620char
621Package::getInType() {
622    return makeTypeLetter(inCharset, inIsBigEndian);
623}
624
625void
626Package::writePackage(const char *filename, char outType, const char *comment) {
627    char prefix[MAX_PKG_NAME_LENGTH+4];
628    UDataOffsetTOCEntry entry;
629    UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT];
630    FILE *file;
631    Item *pItem;
632    char *name;
633    UErrorCode errorCode;
634    int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32;
635    uint8_t outCharset;
636    UBool outIsBigEndian;
637
638    extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH);
639
640    // if there is an explicit comment, then use it, else use what's in the current header
641    if(comment!=NULL) {
642        /* get the header size minus the current comment */
643        DataHeader *pHeader;
644        int32_t length;
645
646        pHeader=(DataHeader *)header;
647        headerLength=4+pHeader->info.size;
648        length=(int32_t)strlen(comment);
649        if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) {
650            fprintf(stderr, "icupkg: comment too long\n");
651            exit(U_BUFFER_OVERFLOW_ERROR);
652        }
653        memcpy(header+headerLength, comment, length+1);
654        headerLength+=length;
655        if(headerLength&0xf) {
656            /* NUL-pad the header to a multiple of 16 */
657            length=(headerLength+0xf)&~0xf;
658            memset(header+headerLength, 0, length-headerLength);
659            headerLength=length;
660        }
661        pHeader->dataHeader.headerSize=(uint16_t)headerLength;
662    }
663
664    makeTypeProps(outType, outCharset, outIsBigEndian);
665
666    // open (TYPE_COUNT-2) swappers
667    // one is a no-op for local type==outType
668    // one type (TYPE_LE) is bogus
669    errorCode=U_ZERO_ERROR;
670    i=makeTypeEnum(outType);
671    ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
672    ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode);
673    ds[TYPE_LE]=NULL;
674    ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode);
675    if(U_FAILURE(errorCode)) {
676        fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode));
677        exit(errorCode);
678    }
679    for(i=0; i<TYPE_COUNT; ++i) {
680        if(ds[i]!=NULL) {
681            ds[i]->printError=printPackageError;
682            ds[i]->printErrorContext=stderr;
683        }
684    }
685
686    dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)];
687
688    // create the file and write its contents
689    file=fopen(filename, "wb");
690    if(file==NULL) {
691        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
692        exit(U_FILE_ACCESS_ERROR);
693    }
694
695    // swap and write the header
696    if(dsLocalToOut!=NULL) {
697        udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode);
698        if(U_FAILURE(errorCode)) {
699            fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode));
700            exit(errorCode);
701        }
702    }
703    length=(int32_t)fwrite(header, 1, headerLength, file);
704    if(length!=headerLength) {
705        fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename);
706        exit(U_FILE_ACCESS_ERROR);
707    }
708
709    // prepare and swap the package name with a tree separator
710    // for prepending to item names
711    strcat(prefix, U_TREE_ENTRY_SEP_STRING);
712    prefixLength=(int32_t)strlen(prefix);
713    if(dsLocalToOut!=NULL) {
714        dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode);
715        if(U_FAILURE(errorCode)) {
716            fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode));
717            exit(errorCode);
718        }
719
720        // swap and sort the item names (sorting needs to be done in the output charset)
721        dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode);
722        if(U_FAILURE(errorCode)) {
723            fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode));
724            exit(errorCode);
725        }
726        sortItems();
727    }
728
729    // create the output item names in sorted order, with the package name prepended to each
730    for(i=0; i<itemCount; ++i) {
731        length=(int32_t)strlen(items[i].name);
732        name=allocString(FALSE, length+prefixLength);
733        memcpy(name, prefix, prefixLength);
734        memcpy(name+prefixLength, items[i].name, length+1);
735        items[i].name=name;
736    }
737
738    // calculate offsets for item names and items, pad to 16-align items
739    // align only the first item; each item's length is a multiple of 16
740    basenameOffset=4+8*itemCount;
741    offset=basenameOffset+outStringTop;
742    if((length=(offset&15))!=0) {
743        length=16-length;
744        memset(allocString(FALSE, length-1), 0xaa, length);
745        offset+=length;
746    }
747
748    // write the table of contents
749    // first the itemCount
750    outInt32=itemCount;
751    if(dsLocalToOut!=NULL) {
752        dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode);
753        if(U_FAILURE(errorCode)) {
754            fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode));
755            exit(errorCode);
756        }
757    }
758    length=(int32_t)fwrite(&outInt32, 1, 4, file);
759    if(length!=4) {
760        fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename);
761        exit(U_FILE_ACCESS_ERROR);
762    }
763
764    // then write the item entries (and collect the maxItemLength)
765    maxItemLength=0;
766    for(i=0; i<itemCount; ++i) {
767        entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings));
768        entry.dataOffset=(uint32_t)offset;
769        if(dsLocalToOut!=NULL) {
770            dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode);
771            if(U_FAILURE(errorCode)) {
772                fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode));
773                exit(errorCode);
774            }
775        }
776        length=(int32_t)fwrite(&entry, 1, 8, file);
777        if(length!=8) {
778            fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename);
779            exit(U_FILE_ACCESS_ERROR);
780        }
781
782        length=items[i].length;
783        if(length>maxItemLength) {
784            maxItemLength=length;
785        }
786        offset+=length;
787    }
788
789    // write the item names
790    length=(int32_t)fwrite(outStrings, 1, outStringTop, file);
791    if(length!=outStringTop) {
792        fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename);
793        exit(U_FILE_ACCESS_ERROR);
794    }
795
796    // write the items
797    for(pItem=items, i=0; i<itemCount; ++pItem, ++i) {
798        int32_t type=makeTypeEnum(pItem->type);
799        if(ds[type]!=NULL) {
800            // swap each item from its platform properties to the desired ones
801            udata_swap(
802                ds[type],
803                pItem->data, pItem->length, pItem->data,
804                &errorCode);
805            if(U_FAILURE(errorCode)) {
806                fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode));
807                exit(errorCode);
808            }
809        }
810        length=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
811        if(length!=pItem->length) {
812            fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename);
813            exit(U_FILE_ACCESS_ERROR);
814        }
815    }
816
817    if(ferror(file)) {
818        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
819        exit(U_FILE_ACCESS_ERROR);
820    }
821
822    fclose(file);
823    for(i=0; i<TYPE_COUNT; ++i) {
824        udata_closeSwapper(ds[i]);
825    }
826}
827
828int32_t
829Package::findItem(const char *name, int32_t length) const {
830    int32_t i, start, limit;
831    int result;
832
833    /* do a binary search for the string */
834    start=0;
835    limit=itemCount;
836    while(start<limit) {
837        i=(start+limit)/2;
838        if(length>=0) {
839            result=strncmp(name, items[i].name, length);
840        } else {
841            result=strcmp(name, items[i].name);
842        }
843
844        if(result==0) {
845            /* found */
846            if(length>=0) {
847                /*
848                 * if we compared just prefixes, then we may need to back up
849                 * to the first item with this prefix
850                 */
851                while(i>0 && 0==strncmp(name, items[i-1].name, length)) {
852                    --i;
853                }
854            }
855            return i;
856        } else if(result<0) {
857            limit=i;
858        } else /* result>0 */ {
859            start=i+1;
860        }
861    }
862
863    return ~start; /* not found, return binary-not of the insertion point */
864}
865
866void
867Package::findItems(const char *pattern) {
868    const char *wild;
869
870    if(pattern==NULL || *pattern==0) {
871        findNextIndex=-1;
872        return;
873    }
874
875    findPrefix=pattern;
876    findSuffix=NULL;
877    findSuffixLength=0;
878
879    wild=strchr(pattern, '*');
880    if(wild==NULL) {
881        // no wildcard
882        findPrefixLength=(int32_t)strlen(pattern);
883    } else {
884        // one wildcard
885        findPrefixLength=(int32_t)(wild-pattern);
886        findSuffix=wild+1;
887        findSuffixLength=(int32_t)strlen(findSuffix);
888        if(NULL!=strchr(findSuffix, '*')) {
889            // two or more wildcards
890            fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern);
891            exit(U_PARSE_ERROR);
892        }
893    }
894
895    if(findPrefixLength==0) {
896        findNextIndex=0;
897    } else {
898        findNextIndex=findItem(findPrefix, findPrefixLength);
899    }
900}
901
902int32_t
903Package::findNextItem() {
904    const char *name, *middle, *treeSep;
905    int32_t idx, nameLength, middleLength;
906
907    if(findNextIndex<0) {
908        return -1;
909    }
910
911    while(findNextIndex<itemCount) {
912        idx=findNextIndex++;
913        name=items[idx].name;
914        nameLength=(int32_t)strlen(name);
915        if(nameLength<(findPrefixLength+findSuffixLength)) {
916            // item name too short for prefix & suffix
917            continue;
918        }
919        if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) {
920            // left the range of names with this prefix
921            break;
922        }
923        middle=name+findPrefixLength;
924        middleLength=nameLength-findPrefixLength-findSuffixLength;
925        if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) {
926            // suffix does not match
927            continue;
928        }
929        // prefix & suffix match
930
931        if(matchMode&MATCH_NOSLASH) {
932            treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR);
933            if(treeSep!=NULL && (treeSep-middle)<middleLength) {
934                // the middle (matching the * wildcard) contains a tree separator /
935                continue;
936            }
937        }
938
939        // found a matching item
940        return idx;
941    }
942
943    // no more items
944    findNextIndex=-1;
945    return -1;
946}
947
948void
949Package::setMatchMode(uint32_t mode) {
950    matchMode=mode;
951}
952
953void
954Package::addItem(const char *name) {
955    addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]);
956}
957
958void
959Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) {
960    int32_t idx;
961
962    idx=findItem(name);
963    if(idx<0) {
964        // new item, make space at the insertion point
965        if(itemCount>=MAX_FILE_COUNT) {
966            fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT);
967            exit(U_BUFFER_OVERFLOW_ERROR);
968        }
969        // move the following items down
970        idx=~idx;
971        if(idx<itemCount) {
972            memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item));
973        }
974        ++itemCount;
975
976        // reset this Item entry
977        memset(items+idx, 0, sizeof(Item));
978
979        // copy the item's name
980        items[idx].name=allocString(TRUE, strlen(name));
981        strcpy(items[idx].name, name);
982        pathToTree(items[idx].name);
983    } else {
984        // same-name item found, replace it
985        if(items[idx].isDataOwned) {
986            free(items[idx].data);
987        }
988
989        // keep the item's name since it is the same
990    }
991
992    // set the item's data
993    items[idx].data=data;
994    items[idx].length=length;
995    items[idx].isDataOwned=isDataOwned;
996    items[idx].type=type;
997}
998
999void
1000Package::addFile(const char *filesPath, const char *name) {
1001    uint8_t *data;
1002    int32_t length;
1003    char type;
1004
1005    data=readFile(filesPath, name, length, type);
1006    // readFile() exits the tool if it fails
1007    addItem(name, data, length, TRUE, type);
1008}
1009
1010void
1011Package::addItems(const Package &listPkg) {
1012    const Item *pItem;
1013    int32_t i;
1014
1015    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1016        addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type);
1017    }
1018}
1019
1020void
1021Package::removeItem(int32_t idx) {
1022    if(idx>=0) {
1023        // remove the item
1024        if(items[idx].isDataOwned) {
1025            free(items[idx].data);
1026        }
1027
1028        // move the following items up
1029        if((idx+1)<itemCount) {
1030            memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item));
1031        }
1032        --itemCount;
1033
1034        if(idx<=findNextIndex) {
1035            --findNextIndex;
1036        }
1037    }
1038}
1039
1040void
1041Package::removeItems(const char *pattern) {
1042    int32_t idx;
1043
1044    findItems(pattern);
1045    while((idx=findNextItem())>=0) {
1046        removeItem(idx);
1047    }
1048}
1049
1050void
1051Package::removeItems(const Package &listPkg) {
1052    const Item *pItem;
1053    int32_t i;
1054
1055    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1056        removeItems(pItem->name);
1057    }
1058}
1059
1060void
1061Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) {
1062    char filename[1024];
1063    UDataSwapper *ds;
1064    FILE *file;
1065    Item *pItem;
1066    int32_t fileLength;
1067    uint8_t itemCharset, outCharset;
1068    UBool itemIsBigEndian, outIsBigEndian;
1069
1070    if(idx<0 || itemCount<=idx) {
1071        return;
1072    }
1073    pItem=items+idx;
1074
1075    // swap the data to the outType
1076    // outType==0: don't swap
1077    if(outType!=0 && pItem->type!=outType) {
1078        // open the swapper
1079        UErrorCode errorCode=U_ZERO_ERROR;
1080        makeTypeProps(pItem->type, itemCharset, itemIsBigEndian);
1081        makeTypeProps(outType, outCharset, outIsBigEndian);
1082        ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode);
1083        if(U_FAILURE(errorCode)) {
1084            fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n",
1085                    (long)idx, u_errorName(errorCode));
1086            exit(errorCode);
1087        }
1088
1089        ds->printError=printPackageError;
1090        ds->printErrorContext=stderr;
1091
1092        // swap the item from its platform properties to the desired ones
1093        udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode);
1094        if(U_FAILURE(errorCode)) {
1095            fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode));
1096            exit(errorCode);
1097        }
1098        udata_closeSwapper(ds);
1099    }
1100
1101    // create the file and write its contents
1102    makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename));
1103    file=fopen(filename, "wb");
1104    if(file==NULL) {
1105        fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename);
1106        exit(U_FILE_ACCESS_ERROR);
1107    }
1108    fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file);
1109
1110    if(ferror(file) || fileLength!=pItem->length) {
1111        fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename);
1112        exit(U_FILE_ACCESS_ERROR);
1113    }
1114    fclose(file);
1115}
1116
1117void
1118Package::extractItem(const char *filesPath, int32_t idx, char outType) {
1119    extractItem(filesPath, items[idx].name, idx, outType);
1120}
1121
1122void
1123Package::extractItems(const char *filesPath, const char *pattern, char outType) {
1124    int32_t idx;
1125
1126    findItems(pattern);
1127    while((idx=findNextItem())>=0) {
1128        extractItem(filesPath, idx, outType);
1129    }
1130}
1131
1132void
1133Package::extractItems(const char *filesPath, const Package &listPkg, char outType) {
1134    const Item *pItem;
1135    int32_t i;
1136
1137    for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) {
1138        extractItems(filesPath, pItem->name, outType);
1139    }
1140}
1141
1142int32_t
1143Package::getItemCount() const {
1144    return itemCount;
1145}
1146
1147const Item *
1148Package::getItem(int32_t idx) const {
1149    if (0 <= idx && idx < itemCount) {
1150        return &items[idx];
1151    }
1152    return NULL;
1153}
1154
1155void
1156Package::checkDependency(void *context, const char *itemName, const char *targetName) {
1157    // check dependency: make sure the target item is in the package
1158    Package *me=(Package *)context;
1159    if(me->findItem(targetName)<0) {
1160        me->isMissingItems=TRUE;
1161        fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName);
1162    }
1163}
1164
1165UBool
1166Package::checkDependencies() {
1167    isMissingItems=FALSE;
1168    enumDependencies(this, checkDependency);
1169    return (UBool)!isMissingItems;
1170}
1171
1172void
1173Package::enumDependencies(void *context, CheckDependency check) {
1174    int32_t i;
1175
1176    for(i=0; i<itemCount; ++i) {
1177        enumDependencies(items+i, context, check);
1178    }
1179}
1180
1181char *
1182Package::allocString(UBool in, int32_t length) {
1183    char *p;
1184    int32_t top;
1185
1186    if(in) {
1187        top=inStringTop;
1188        p=inStrings+top;
1189    } else {
1190        top=outStringTop;
1191        p=outStrings+top;
1192    }
1193    top+=length+1;
1194
1195    if(top>STRING_STORE_SIZE) {
1196        fprintf(stderr, "icupkg: string storage overflow\n");
1197        exit(U_BUFFER_OVERFLOW_ERROR);
1198    }
1199    if(in) {
1200        inStringTop=top;
1201    } else {
1202        outStringTop=top;
1203    }
1204    return p;
1205}
1206
1207void
1208Package::sortItems() {
1209    UErrorCode errorCode=U_ZERO_ERROR;
1210    uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode);
1211    if(U_FAILURE(errorCode)) {
1212        fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode));
1213        exit(errorCode);
1214    }
1215}
1216
1217U_NAMESPACE_END
1218