1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2011, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  pkgitems.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005sep18
14*   created by: Markus W. Scherer
15*
16*   Companion file to package.cpp. Deals with details of ICU data item formats.
17*   Used for item dependencies.
18*   Contains adapted code from ucnv_bld.c (swapper code from 2003).
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/ures.h"
23#include "unicode/putil.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "uinvchar.h"
27#include "ucmndata.h"
28#include "udataswp.h"
29#include "swapimpl.h"
30#include "toolutil.h"
31#include "package.h"
32#include "pkg_imp.h"
33
34#include <stdio.h>
35#include <stdlib.h>
36#include <string.h>
37
38/* item formats in common */
39
40#include "uresdata.h"
41#include "ucnv_bld.h"
42#include "ucnv_io.h"
43
44// general definitions ----------------------------------------------------- ***
45
46#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
47
48U_CDECL_BEGIN
49
50static void U_CALLCONV
51printError(void *context, const char *fmt, va_list args) {
52    vfprintf((FILE *)context, fmt, args);
53}
54
55U_CDECL_END
56
57// a data item in native-platform form ------------------------------------- ***
58
59U_NAMESPACE_BEGIN
60
61class NativeItem {
62public:
63    NativeItem() : pItem(NULL), pInfo(NULL), bytes(NULL), swapped(NULL), length(0) {}
64    NativeItem(const Item *item, UDataSwapFn *swap) : swapped(NULL) {
65        setItem(item, swap);
66    }
67    ~NativeItem() {
68        delete [] swapped;
69    }
70    const UDataInfo *getDataInfo() const {
71        return pInfo;
72    }
73    const uint8_t *getBytes() const {
74        return bytes;
75    }
76    int32_t getLength() const {
77        return length;
78    }
79
80    void setItem(const Item *item, UDataSwapFn *swap) {
81        pItem=item;
82        int32_t infoLength, itemHeaderLength;
83        UErrorCode errorCode=U_ZERO_ERROR;
84        pInfo=::getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
85        if(U_FAILURE(errorCode)) {
86            exit(errorCode); // should succeed because readFile() checks headers
87        }
88        length=pItem->length-itemHeaderLength;
89
90        if(pInfo->isBigEndian==U_IS_BIG_ENDIAN && pInfo->charsetFamily==U_CHARSET_FAMILY) {
91            bytes=pItem->data+itemHeaderLength;
92        } else {
93            UDataSwapper *ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
94            if(U_FAILURE(errorCode)) {
95                fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
96                        pItem->name, u_errorName(errorCode));
97                exit(errorCode);
98            }
99
100            ds->printError=printError;
101            ds->printErrorContext=stderr;
102
103            swapped=new uint8_t[pItem->length];
104            if(swapped==NULL) {
105                fprintf(stderr, "icupkg: unable to allocate memory for swapping \"%s\"\n", pItem->name);
106                exit(U_MEMORY_ALLOCATION_ERROR);
107            }
108            swap(ds, pItem->data, pItem->length, swapped, &errorCode);
109            pInfo=::getDataInfo(swapped, pItem->length, infoLength, itemHeaderLength, &errorCode);
110            bytes=swapped+itemHeaderLength;
111            udata_closeSwapper(ds);
112        }
113    }
114
115private:
116    const Item *pItem;
117    const UDataInfo *pInfo;
118    const uint8_t *bytes;
119    uint8_t *swapped;
120    int32_t length;
121};
122
123// check a dependency ------------------------------------------------------ ***
124
125/*
126 * assemble the target item name from the source item name, an ID
127 * and a suffix
128 */
129static void
130makeTargetName(const char *itemName, const char *id, int32_t idLength, const char *suffix,
131               char *target, int32_t capacity,
132               UErrorCode *pErrorCode) {
133    const char *itemID;
134    int32_t treeLength, suffixLength, targetLength;
135
136    // get the item basename
137    itemID=strrchr(itemName, '/');
138    if(itemID!=NULL) {
139        ++itemID;
140    } else {
141        itemID=itemName;
142    }
143
144    // build the target string
145    treeLength=(int32_t)(itemID-itemName);
146    if(idLength<0) {
147        idLength=(int32_t)strlen(id);
148    }
149    suffixLength=(int32_t)strlen(suffix);
150    targetLength=treeLength+idLength+suffixLength;
151    if(targetLength>=capacity) {
152        fprintf(stderr, "icupkg/makeTargetName(%s) target item name length %ld too long\n",
153                        itemName, (long)targetLength);
154        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
155        return;
156    }
157
158    memcpy(target, itemName, treeLength);
159    memcpy(target+treeLength, id, idLength);
160    memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
161}
162
163static void
164checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
165              CheckDependency check, void *context,
166              UErrorCode *pErrorCode) {
167    char target[200];
168    makeTargetName(itemName, id, idLength, suffix, target, (int32_t)sizeof(target), pErrorCode);
169    if(U_SUCCESS(*pErrorCode)) {
170        check(context, itemName, target);
171    }
172}
173
174/* assemble the target item name from the item's parent item name */
175static void
176checkParent(const char *itemName, CheckDependency check, void *context,
177            UErrorCode *pErrorCode) {
178    const char *itemID, *parent, *parentLimit, *suffix;
179    int32_t parentLength;
180
181    // get the item basename
182    itemID=strrchr(itemName, '/');
183    if(itemID!=NULL) {
184        ++itemID;
185    } else {
186        itemID=itemName;
187    }
188
189    // get the item suffix
190    suffix=strrchr(itemID, '.');
191    if(suffix==NULL) {
192        // empty suffix, point to the end of the string
193        suffix=strrchr(itemID, 0);
194    }
195
196    // get the position of the last '_'
197    for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
198
199    if(parentLimit!=itemID) {
200        // get the parent item name by truncating the last part of this item's name */
201        parent=itemID;
202        parentLength=(int32_t)(parentLimit-itemID);
203    } else {
204        // no '_' in the item name: the parent is the root bundle
205        parent="root";
206        parentLength=4;
207        if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
208            // the item itself is "root", which does not depend on a parent
209            return;
210        }
211    }
212    checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
213}
214
215// get dependencies from resource bundles ---------------------------------- ***
216
217static const UChar SLASH=0x2f;
218
219/*
220 * Check for the alias from the string or alias resource res.
221 */
222static void
223checkAlias(const char *itemName,
224           Resource res, const UChar *alias, int32_t length, UBool useResSuffix,
225           CheckDependency check, void *context, UErrorCode *pErrorCode) {
226    int32_t i;
227
228    if(!uprv_isInvariantUString(alias, length)) {
229        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-invariant characters\n",
230                        itemName, res);
231        *pErrorCode=U_INVALID_CHAR_FOUND;
232        return;
233    }
234
235    // extract the locale ID from alias strings like
236    // locale_ID/key1/key2/key3
237    // locale_ID
238
239    // search for the first slash
240    for(i=0; i<length && alias[i]!=SLASH; ++i) {}
241
242    if(res_getPublicType(res)==URES_ALIAS) {
243        // ignore aliases with an initial slash:
244        // /ICUDATA/... and /pkgname/... go to a different package
245        // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
246        if(i==0) {
247            return; // initial slash ('/')
248        }
249
250        // ignore the intra-bundle path starting from the first slash ('/')
251        length=i;
252    } else /* URES_STRING */ {
253        // the whole string should only consist of a locale ID
254        if(i!=length) {
255            fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
256                            itemName, res);
257            *pErrorCode=U_UNSUPPORTED_ERROR;
258            return;
259        }
260    }
261
262    // convert the Unicode string to char *
263    char localeID[32];
264    if(length>=(int32_t)sizeof(localeID)) {
265        fprintf(stderr, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
266                        itemName, res, (long)length);
267        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
268        return;
269    }
270    u_UCharsToChars(alias, localeID, length);
271    localeID[length]=0;
272
273    checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
274}
275
276/*
277 * Enumerate one resource item and its children and extract dependencies from
278 * aliases.
279 */
280static void
281ures_enumDependencies(const char *itemName,
282                      const ResourceData *pResData,
283                      Resource res, const char *inKey, const char *parentKey, int32_t depth,
284                      CheckDependency check, void *context,
285                      Package *pkg,
286                      UErrorCode *pErrorCode) {
287    switch(res_getPublicType(res)) {
288    case URES_STRING:
289        {
290            UBool useResSuffix = TRUE;
291            // Check for %%ALIAS
292            if(depth==1 && inKey!=NULL) {
293                if(0!=strcmp(inKey, "%%ALIAS")) {
294                    break;
295                }
296            }
297            // Check for %%DEPENDENCY
298            else if(depth==2 && parentKey!=NULL) {
299                if(0!=strcmp(parentKey, "%%DEPENDENCY")) {
300                    break;
301                }
302                useResSuffix = FALSE;
303            } else {
304                // we ignore all other strings
305                break;
306            }
307            int32_t length;
308            const UChar *alias=res_getString(pResData, res, &length);
309            checkAlias(itemName, res, alias, length, useResSuffix, check, context, pErrorCode);
310        }
311        break;
312    case URES_ALIAS:
313        {
314            int32_t length;
315            const UChar *alias=res_getAlias(pResData, res, &length);
316            checkAlias(itemName, res, alias, length, TRUE, check, context, pErrorCode);
317        }
318        break;
319    case URES_TABLE:
320        {
321            /* recurse */
322            int32_t count=res_countArrayItems(pResData, res);
323            for(int32_t i=0; i<count; ++i) {
324                const char *itemKey;
325                Resource item=res_getTableItemByIndex(pResData, res, i, &itemKey);
326                ures_enumDependencies(
327                        itemName, pResData,
328                        item, itemKey,
329                        inKey, depth+1,
330                        check, context,
331                        pkg,
332                        pErrorCode);
333                if(U_FAILURE(*pErrorCode)) {
334                    fprintf(stderr, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%s: %08x) failed\n",
335                                    itemName, res, i, itemKey, item);
336                    break;
337                }
338            }
339        }
340        break;
341    case URES_ARRAY:
342        {
343            /* recurse */
344            int32_t count=res_countArrayItems(pResData, res);
345            for(int32_t i=0; i<count; ++i) {
346                Resource item=res_getArrayItem(pResData, res, i);
347                ures_enumDependencies(
348                        itemName, pResData,
349                        item, NULL,
350                        inKey, depth+1,
351                        check, context,
352                        pkg,
353                        pErrorCode);
354                if(U_FAILURE(*pErrorCode)) {
355                    fprintf(stderr, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
356                                    itemName, res, i, item);
357                    break;
358                }
359            }
360        }
361        break;
362    default:
363        break;
364    }
365}
366
367static void
368ures_enumDependencies(const char *itemName, const UDataInfo *pInfo,
369                      const uint8_t *inBytes, int32_t length,
370                      CheckDependency check, void *context,
371                      Package *pkg,
372                      UErrorCode *pErrorCode) {
373    ResourceData resData;
374
375    res_read(&resData, pInfo, inBytes, length, pErrorCode);
376    if(U_FAILURE(*pErrorCode)) {
377        fprintf(stderr, "icupkg: .res format version %02x.%02x not supported, or bundle malformed\n",
378                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
379        exit(U_UNSUPPORTED_ERROR);
380    }
381
382    /*
383     * if the bundle attributes are present and the nofallback flag is not set,
384     * then add the parent bundle as a dependency
385     */
386    if(pInfo->formatVersion[0]>1 || (pInfo->formatVersion[0]==1 && pInfo->formatVersion[1]>=1)) {
387        if(!resData.noFallback) {
388            /* this bundle participates in locale fallback */
389            checkParent(itemName, check, context, pErrorCode);
390        }
391    }
392
393    icu::NativeItem nativePool;
394
395    if(resData.usesPoolBundle) {
396        char poolName[200];
397        makeTargetName(itemName, "pool", 4, ".res", poolName, (int32_t)sizeof(poolName), pErrorCode);
398        if(U_FAILURE(*pErrorCode)) {
399            return;
400        }
401        check(context, itemName, poolName);
402        int32_t index=pkg->findItem(poolName);
403        if(index<0) {
404            // We cannot work with a bundle if its pool resource is missing.
405            // check() already printed a complaint.
406            return;
407        }
408        // TODO: Cache the native version in the Item itself.
409        nativePool.setItem(pkg->getItem(index), ures_swap);
410        const UDataInfo *poolInfo=nativePool.getDataInfo();
411        if(poolInfo->formatVersion[0]<=1) {
412            fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
413            return;
414        }
415        const int32_t *poolIndexes=(const int32_t *)nativePool.getBytes()+1;
416        int32_t poolIndexLength=poolIndexes[URES_INDEX_LENGTH]&0xff;
417        if(!(poolIndexLength>URES_INDEX_POOL_CHECKSUM &&
418             (poolIndexes[URES_INDEX_ATTRIBUTES]&URES_ATT_IS_POOL_BUNDLE))
419        ) {
420            fprintf(stderr, "icupkg: %s is not a pool bundle\n", poolName);
421            return;
422        }
423        if(resData.pRoot[1+URES_INDEX_POOL_CHECKSUM]==poolIndexes[URES_INDEX_POOL_CHECKSUM]) {
424            resData.poolBundleKeys=(const char *)(poolIndexes+poolIndexLength);
425        } else {
426            fprintf(stderr, "icupkg: %s has mismatched checksum for %s\n", poolName, itemName);
427            return;
428        }
429    }
430
431    ures_enumDependencies(
432        itemName, &resData,
433        resData.rootRes, NULL, NULL, 0,
434        check, context,
435        pkg,
436        pErrorCode);
437}
438
439// get dependencies from conversion tables --------------------------------- ***
440
441/* code adapted from ucnv_swap() */
442static void
443ucnv_enumDependencies(const UDataSwapper *ds,
444                      const char *itemName, const UDataInfo *pInfo,
445                      const uint8_t *inBytes, int32_t length,
446                      CheckDependency check, void *context,
447                      UErrorCode *pErrorCode) {
448    uint32_t staticDataSize;
449
450    const UConverterStaticData *inStaticData;
451
452    const _MBCSHeader *inMBCSHeader;
453    uint8_t outputType;
454
455    /* check format version */
456    if(!(
457        pInfo->formatVersion[0]==6 &&
458        pInfo->formatVersion[1]>=2
459    )) {
460        fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
461                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
462        exit(U_UNSUPPORTED_ERROR);
463    }
464
465    /* read the initial UConverterStaticData structure after the UDataInfo header */
466    inStaticData=(const UConverterStaticData *)inBytes;
467
468    if( length<(int32_t)sizeof(UConverterStaticData) ||
469        (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
470    ) {
471        udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
472                            length);
473        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
474        return;
475    }
476
477    inBytes+=staticDataSize;
478    length-=(int32_t)staticDataSize;
479
480    /* check for supported conversionType values */
481    if(inStaticData->conversionType==UCNV_MBCS) {
482        /* MBCS data */
483        uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
484        int32_t extOffset;
485
486        inMBCSHeader=(const _MBCSHeader *)inBytes;
487
488        if(length<(int32_t)sizeof(_MBCSHeader)) {
489            udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
490                                length);
491            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
492            return;
493        }
494        if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
495            mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
496        } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
497                  ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
498                   MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
499        ) {
500            mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
501        } else {
502            udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
503                             inMBCSHeader->version[0], inMBCSHeader->version[1]);
504            *pErrorCode=U_UNSUPPORTED_ERROR;
505            return;
506        }
507
508        mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
509        extOffset=(int32_t)(mbcsHeaderFlags>>8);
510        outputType=(uint8_t)mbcsHeaderFlags;
511
512        if(outputType==MBCS_OUTPUT_EXT_ONLY) {
513            /*
514             * extension-only file,
515             * contains a base name instead of normal base table data
516             */
517            char baseName[32];
518            int32_t baseNameLength;
519
520            /* there is extension data after the base data, see ucnv_ext.h */
521            if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
522                udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
523                                 length);
524                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
525                return;
526            }
527
528            /* swap the base name, between the header and the extension data */
529            const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
530            baseNameLength=(int32_t)strlen(inBaseName);
531            if(baseNameLength>=(int32_t)sizeof(baseName)) {
532                udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
533                                 itemName, baseNameLength);
534                *pErrorCode=U_UNSUPPORTED_ERROR;
535                return;
536            }
537            ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
538
539            checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
540        }
541    }
542}
543
544// ICU data formats -------------------------------------------------------- ***
545
546static const struct {
547    uint8_t dataFormat[4];
548} dataFormats[]={
549    { { 0x52, 0x65, 0x73, 0x42 } },     /* dataFormat="ResB" */
550    { { 0x63, 0x6e, 0x76, 0x74 } },     /* dataFormat="cnvt" */
551    { { 0x43, 0x76, 0x41, 0x6c } }      /* dataFormat="CvAl" */
552};
553
554enum {
555    FMT_RES,
556    FMT_CNV,
557    FMT_ALIAS,
558    FMT_COUNT
559};
560
561static int32_t
562getDataFormat(const uint8_t dataFormat[4]) {
563    int32_t i;
564
565    for(i=0; i<FMT_COUNT; ++i) {
566        if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
567            return i;
568        }
569    }
570    return -1;
571}
572
573// enumerate dependencies of a package item -------------------------------- ***
574
575void
576Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
577    int32_t infoLength, itemHeaderLength;
578    UErrorCode errorCode=U_ZERO_ERROR;
579    const UDataInfo *pInfo=getDataInfo(pItem->data, pItem->length, infoLength, itemHeaderLength, &errorCode);
580    if(U_FAILURE(errorCode)) {
581        return; // should not occur because readFile() checks headers
582    }
583
584    // find the data format and call the corresponding function, if any
585    int32_t format=getDataFormat(pInfo->dataFormat);
586    if(format>=0) {
587        switch(format) {
588        case FMT_RES:
589            {
590                /*
591                 * Swap the resource bundle (if necessary) so that we can use
592                 * the normal runtime uresdata.c code to read it.
593                 * We do not want to duplicate that code, especially not together with on-the-fly swapping.
594                 */
595                NativeItem nrb(pItem, ures_swap);
596                ures_enumDependencies(pItem->name, nrb.getDataInfo(), nrb.getBytes(), nrb.getLength(), check, context, this, &errorCode);
597                break;
598            }
599        case FMT_CNV:
600            {
601                // TODO: share/cache swappers
602                UDataSwapper *ds=udata_openSwapper(
603                                    (UBool)pInfo->isBigEndian, pInfo->charsetFamily,
604                                    U_IS_BIG_ENDIAN, U_CHARSET_FAMILY,
605                                    &errorCode);
606                if(U_FAILURE(errorCode)) {
607                    fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
608                            pItem->name, u_errorName(errorCode));
609                    exit(errorCode);
610                }
611
612                ds->printError=printError;
613                ds->printErrorContext=stderr;
614
615                const uint8_t *inBytes=pItem->data+itemHeaderLength;
616                int32_t length=pItem->length-itemHeaderLength;
617
618                ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
619                udata_closeSwapper(ds);
620                break;
621            }
622        default:
623            break;
624        }
625
626        if(U_FAILURE(errorCode)) {
627            exit(errorCode);
628        }
629    }
630}
631
632U_NAMESPACE_END
633