1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2008, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  pkgitems.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005sep18
14*   created by: Markus W. Scherer
15*
16*   Companion file to package.cpp. Deals with details of ICU data item formats.
17*   Used for item dependencies.
18*   Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003).
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/ures.h"
23#include "unicode/putil.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "ucmndata.h"
27#include "udataswp.h"
28#include "swapimpl.h"
29#include "toolutil.h"
30#include "package.h"
31#include "pkg_imp.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36
37/* item formats in common */
38
39#include "uresdata.h"
40#include "ucnv_bld.h"
41#include "ucnv_io.h"
42
43// general definitions ----------------------------------------------------- ***
44
45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46
47U_CDECL_BEGIN
48
49static void U_CALLCONV
50printError(void *context, const char *fmt, va_list args) {
51    vfprintf((FILE *)context, fmt, args);
52}
53
54U_CDECL_END
55
56// check a dependency ------------------------------------------------------ ***
57
58/*
59 * assemble the target item name from the source item name, an ID
60 * and a suffix
61 */
62static void
63checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
64              CheckDependency check, void *context,
65              UErrorCode *pErrorCode) {
66    char target[200];
67    const char *itemID;
68    int32_t treeLength, suffixLength, targetLength;
69
70    // get the item basename
71    itemID=strrchr(itemName, '/');
72    if(itemID!=NULL) {
73        ++itemID;
74    } else {
75        itemID=itemName;
76    }
77
78    // build the target string
79    treeLength=(int32_t)(itemID-itemName);
80    if(idLength<0) {
81        idLength=(int32_t)strlen(id);
82    }
83    suffixLength=(int32_t)strlen(suffix);
84    targetLength=treeLength+idLength+suffixLength;
85    if(targetLength>=(int32_t)sizeof(target)) {
86        fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n",
87                        itemName, (long)targetLength);
88        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
89        return;
90    }
91
92    memcpy(target, itemName, treeLength);
93    memcpy(target+treeLength, id, idLength);
94    memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
95
96    check(context, itemName, target);
97}
98
99/* assemble the target item name from the item's parent item name */
100static void
101checkParent(const char *itemName, CheckDependency check, void *context,
102            UErrorCode *pErrorCode) {
103    const char *itemID, *parent, *parentLimit, *suffix;
104    int32_t parentLength;
105
106    // get the item basename
107    itemID=strrchr(itemName, '/');
108    if(itemID!=NULL) {
109        ++itemID;
110    } else {
111        itemID=itemName;
112    }
113
114    // get the item suffix
115    suffix=strrchr(itemID, '.');
116    if(suffix==NULL) {
117        // empty suffix, point to the end of the string
118        suffix=strrchr(itemID, 0);
119    }
120
121    // get the position of the last '_'
122    for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
123
124    if(parentLimit!=itemID) {
125        // get the parent item name by truncating the last part of this item's name */
126        parent=itemID;
127        parentLength=(int32_t)(parentLimit-itemID);
128    } else {
129        // no '_' in the item name: the parent is the root bundle
130        parent="root";
131        parentLength=4;
132        if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
133            // the item itself is "root", which does not depend on a parent
134            return;
135        }
136    }
137    checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
138}
139
140// get dependencies from resource bundles ---------------------------------- ***
141
142static const char gAliasKey[]="%%ALIAS";
143static const char gDependencyKey[]="%%DEPENDENCY";
144enum { gAliasKeyLength=7, gDependencyKeyLength=12 };
145
146/*
147 * Enumerate one resource item and its children and extract dependencies from
148 * aliases.
149 * Code adapted from ures_preflightResource() and ures_swapResource().
150 */
151static void
152ures_enumDependencies(const UDataSwapper *ds,
153                      const char *itemName,
154                      const Resource *inBundle, int32_t length,
155                      Resource res, const char *inKey, const char *parentKey, int32_t depth,
156                      CheckDependency check, void *context,
157                      UErrorCode *pErrorCode) {
158    const Resource *p;
159    int32_t offset;
160    UBool useResSuffix = TRUE;
161
162    if(res==0 || RES_GET_TYPE(res)==URES_INT) {
163        /* empty string or integer, nothing to do */
164        return;
165    }
166
167    /* all other types use an offset to point to their data */
168    offset=(int32_t)RES_GET_OFFSET(res);
169    if(0<=length && length<=offset) {
170        udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n",
171                         itemName, res, length);
172        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
173        return;
174    }
175    p=inBundle+offset;
176
177    switch(RES_GET_TYPE(res)) {
178        /* strings and aliases have physically the same value layout */
179    case URES_STRING:
180        // Check for %%ALIAS
181        if(depth==1 && inKey!=NULL) {
182            char key[gAliasKeyLength+1];
183            int32_t keyLength;
184
185            keyLength=(int32_t)strlen(inKey);
186            if(keyLength!=gAliasKeyLength) {
187                break;
188            }
189            ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode);
190            if(U_FAILURE(*pErrorCode)) {
191                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
192                                itemName, res);
193                return;
194            }
195            if(0!=strcmp(key, gAliasKey)) {
196                break;
197            }
198        }
199        // Check for %%DEPENDENCY
200        else if(depth==2 && parentKey!=NULL) {
201            char key[gDependencyKeyLength+1];
202            int32_t keyLength;
203
204            keyLength=(int32_t)strlen(parentKey);
205            if(keyLength!=gDependencyKeyLength) {
206                break;
207            }
208            ds->swapInvChars(ds, parentKey, gDependencyKeyLength+1, key, pErrorCode);
209            if(U_FAILURE(*pErrorCode)) {
210                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
211                                itemName, res);
212                return;
213            }
214            if(0!=strcmp(key, gDependencyKey)) {
215                break;
216            }
217            useResSuffix = FALSE;
218        } else {
219            // we ignore all other strings
220            break;
221        }
222        // for the top-level %%ALIAS or %%DEPENDENCY string fall through to URES_ALIAS
223    case URES_ALIAS:
224        {
225            char localeID[32];
226            const uint16_t *p16;
227            int32_t i, stringLength;
228            uint16_t u16, ored16;
229
230            stringLength=udata_readInt32(ds, (int32_t)*p);
231
232            /* top=offset+1+(string length +1)/2 rounded up */
233            offset+=1+((stringLength+1)+1)/2;
234            if(offset>length) {
235                break; // the resource does not fit into the bundle, print error below
236            }
237
238            // extract the locale ID from alias strings like
239            // locale_ID/key1/key2/key3
240            // locale_ID
241            if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
242                u16=0x2f;   // slash in local endianness
243            } else {
244                u16=0x2f00; // slash in opposite endianness
245            }
246            p16=(const uint16_t *)(p+1); // Unicode string contents
247
248            // search for the first slash
249            for(i=0; i<stringLength && p16[i]!=u16; ++i) {}
250
251            if(RES_GET_TYPE(res)==URES_ALIAS) {
252                // ignore aliases with an initial slash:
253                // /ICUDATA/... and /pkgname/... go to a different package
254                // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
255                if(i==0) {
256                    break; // initial slash ('/')
257                }
258
259                // ignore the intra-bundle path starting from the first slash ('/')
260                stringLength=i;
261            } else /* URES_STRING */ {
262                // the whole string should only consist of a locale ID
263                if(i!=stringLength) {
264                    udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
265                                    itemName, res);
266                    *pErrorCode=U_UNSUPPORTED_ERROR;
267                    return;
268                }
269            }
270
271            // convert the Unicode string to char * and
272            // check that it has a bundle path but no package
273            if(stringLength>=(int32_t)sizeof(localeID)) {
274                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
275                                itemName, res, stringLength);
276                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
277                return;
278            }
279
280            // convert the alias Unicode string to US-ASCII
281            ored16=0;
282            if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
283                for(i=0; i<stringLength; ++i) {
284                    u16=p16[i];
285                    ored16|=u16;
286                    localeID[i]=(char)u16;
287                }
288            } else {
289                for(i=0; i<stringLength; ++i) {
290                    u16=p16[i];
291                    ored16|=u16;
292                    localeID[i]=(char)(u16>>8);
293                }
294                ored16=(uint16_t)((ored16<<8)|(ored16>>8));
295            }
296            localeID[stringLength]=0;
297            if(ored16>0x7f) {
298                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n",
299                                itemName, res);
300                *pErrorCode=U_INVALID_CHAR_FOUND;
301                return;
302            }
303
304#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
305            // swap to EBCDIC
306            // our swapper is probably not the right one, but
307            // the function uses it only for printing errors
308            uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode);
309            if(U_FAILURE(*pErrorCode)) {
310                return;
311            }
312#endif
313#if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY
314#           error Unknown U_CHARSET_FAMILY value!
315#endif
316
317            checkIDSuffix(itemName, localeID, -1, (useResSuffix ? ".res" : ""), check, context, pErrorCode);
318        }
319        break;
320    case URES_TABLE:
321    case URES_TABLE32:
322        {
323            const uint16_t *pKey16;
324            const int32_t *pKey32;
325
326            Resource item;
327            int32_t i, count;
328
329            if(RES_GET_TYPE(res)==URES_TABLE) {
330                /* get table item count */
331                pKey16=(const uint16_t *)p;
332                count=ds->readUInt16(*pKey16++);
333
334                pKey32=NULL;
335
336                /* top=((1+ table item count)/2 rounded up)+(table item count) */
337                offset+=((1+count)+1)/2;
338            } else {
339                /* get table item count */
340                pKey32=(const int32_t *)p;
341                count=udata_readInt32(ds, *pKey32++);
342
343                pKey16=NULL;
344
345                /* top=(1+ table item count)+(table item count) */
346                offset+=1+count;
347            }
348
349            p=inBundle+offset; /* pointer to table resources */
350            offset+=count;
351
352            if(offset>length) {
353                break; // the resource does not fit into the bundle, print error below
354            }
355
356            /* recurse */
357            for(i=0; i<count; ++i) {
358                item=ds->readUInt32(*p++);
359                ures_enumDependencies(
360                        ds, itemName, inBundle, length, item,
361                        ((const char *)inBundle)+
362                            (pKey16!=NULL ?
363                                ds->readUInt16(pKey16[i]) :
364                                udata_readInt32(ds, pKey32[i])),
365                        inKey, depth+1,
366                        check, context,
367                        pErrorCode);
368                if(U_FAILURE(*pErrorCode)) {
369                    udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n",
370                                        itemName, res, i, item);
371                    break;
372                }
373            }
374        }
375        break;
376    case URES_ARRAY:
377        {
378            Resource item;
379            int32_t i, count;
380
381            /* top=offset+1+(array length) */
382            count=udata_readInt32(ds, (int32_t)*p++);
383            offset+=1+count;
384
385            if(offset>length) {
386                break; // the resource does not fit into the bundle, print error below
387            }
388
389            /* recurse */
390            for(i=0; i<count; ++i) {
391                item=ds->readUInt32(*p++);
392                ures_enumDependencies(
393                        ds, itemName, inBundle, length,
394                        item, NULL, inKey, depth+1,
395                        check, context,
396                        pErrorCode);
397                if(U_FAILURE(*pErrorCode)) {
398                    udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
399                                        itemName, res, i, item);
400                    break;
401                }
402            }
403        }
404        break;
405    default:
406        break;
407    }
408
409    if(U_FAILURE(*pErrorCode)) {
410        /* nothing to do */
411    } else if(0<=length && length<offset) {
412        udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n",
413                         itemName, res, length);
414        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
415    }
416}
417
418/* code adapted from ures_swap() */
419static void
420ures_enumDependencies(const UDataSwapper *ds,
421                      const char *itemName, const UDataInfo *pInfo,
422                      const uint8_t *inBytes, int32_t length,
423                      CheckDependency check, void *context,
424                      UErrorCode *pErrorCode) {
425    const Resource *inBundle;
426    Resource rootRes;
427
428    /* the following integers count Resource item offsets (4 bytes each), not bytes */
429    int32_t bundleLength;
430
431    /* check format version */
432    if(pInfo->formatVersion[0]!=1) {
433        fprintf(stderr, "icupkg: .res format version %02x not supported\n",
434                        pInfo->formatVersion[0]);
435        exit(U_UNSUPPORTED_ERROR);
436    }
437
438    /* a resource bundle must contain at least one resource item */
439    bundleLength=length/4;
440
441    /* formatVersion 1.1 must have a root item and at least 5 indexes */
442    if( bundleLength<
443            (pInfo->formatVersion[1]==0 ? 1 : 1+5)
444    ) {
445        fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n",
446                        length);
447        exit(U_INDEX_OUTOFBOUNDS_ERROR);
448    }
449
450    inBundle=(const Resource *)inBytes;
451    rootRes=ds->readUInt32(*inBundle);
452
453    ures_enumDependencies(
454        ds, itemName, inBundle, bundleLength,
455        rootRes, NULL, NULL, 0,
456        check, context,
457        pErrorCode);
458
459    /*
460     * if the bundle attributes are present and the nofallback flag is not set,
461     * then add the parent bundle as a dependency
462     */
463    if(pInfo->formatVersion[1]>=1) {
464        int32_t indexes[URES_INDEX_TOP];
465        const int32_t *inIndexes;
466
467        inIndexes=(const int32_t *)inBundle+1;
468        indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]);
469        if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) {
470            indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]);
471            if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) {
472                /* this bundle participates in locale fallback */
473                checkParent(itemName, check, context, pErrorCode);
474            }
475        }
476    }
477}
478
479// get dependencies from conversion tables --------------------------------- ***
480
481/* code adapted from ucnv_swap() */
482static void
483ucnv_enumDependencies(const UDataSwapper *ds,
484                      const char *itemName, const UDataInfo *pInfo,
485                      const uint8_t *inBytes, int32_t length,
486                      CheckDependency check, void *context,
487                      UErrorCode *pErrorCode) {
488    uint32_t staticDataSize;
489
490    const UConverterStaticData *inStaticData;
491
492    const _MBCSHeader *inMBCSHeader;
493    uint8_t outputType;
494
495    /* check format version */
496    if(!(
497        pInfo->formatVersion[0]==6 &&
498        pInfo->formatVersion[1]>=2
499    )) {
500        fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
501                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
502        exit(U_UNSUPPORTED_ERROR);
503    }
504
505    /* read the initial UConverterStaticData structure after the UDataInfo header */
506    inStaticData=(const UConverterStaticData *)inBytes;
507
508    if( length<(int32_t)sizeof(UConverterStaticData) ||
509        (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
510    ) {
511        udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
512                            length);
513        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
514        return;
515    }
516
517    inBytes+=staticDataSize;
518    length-=(int32_t)staticDataSize;
519
520    /* check for supported conversionType values */
521    if(inStaticData->conversionType==UCNV_MBCS) {
522        /* MBCS data */
523        uint32_t mbcsHeaderLength, mbcsHeaderFlags, mbcsHeaderOptions;
524        int32_t extOffset;
525
526        inMBCSHeader=(const _MBCSHeader *)inBytes;
527
528        if(length<(int32_t)sizeof(_MBCSHeader)) {
529            udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
530                                length);
531            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
532            return;
533        }
534        if(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1) {
535            mbcsHeaderLength=MBCS_HEADER_V4_LENGTH;
536        } else if(inMBCSHeader->version[0]==5 && inMBCSHeader->version[1]>=3 &&
537                  ((mbcsHeaderOptions=ds->readUInt32(inMBCSHeader->options))&
538                   MBCS_OPT_UNKNOWN_INCOMPATIBLE_MASK)==0
539        ) {
540            mbcsHeaderLength=mbcsHeaderOptions&MBCS_OPT_LENGTH_MASK;
541        } else {
542            udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
543                             inMBCSHeader->version[0], inMBCSHeader->version[1]);
544            *pErrorCode=U_UNSUPPORTED_ERROR;
545            return;
546        }
547
548        mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
549        extOffset=(int32_t)(mbcsHeaderFlags>>8);
550        outputType=(uint8_t)mbcsHeaderFlags;
551
552        if(outputType==MBCS_OUTPUT_EXT_ONLY) {
553            /*
554             * extension-only file,
555             * contains a base name instead of normal base table data
556             */
557            char baseName[32];
558            int32_t baseNameLength;
559
560            /* there is extension data after the base data, see ucnv_ext.h */
561            if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
562                udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
563                                 length);
564                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
565                return;
566            }
567
568            /* swap the base name, between the header and the extension data */
569            const char *inBaseName=(const char *)inBytes+mbcsHeaderLength*4;
570            baseNameLength=(int32_t)strlen(inBaseName);
571            if(baseNameLength>=(int32_t)sizeof(baseName)) {
572                udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
573                                 itemName, baseNameLength);
574                *pErrorCode=U_UNSUPPORTED_ERROR;
575                return;
576            }
577            ds->swapInvChars(ds, inBaseName, baseNameLength+1, baseName, pErrorCode);
578
579            checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
580        }
581    }
582}
583
584// ICU data formats -------------------------------------------------------- ***
585
586static const struct {
587    uint8_t dataFormat[4];
588} dataFormats[]={
589    { { 0x52, 0x65, 0x73, 0x42 } },     /* dataFormat="ResB" */
590    { { 0x63, 0x6e, 0x76, 0x74 } },     /* dataFormat="cnvt" */
591    { { 0x43, 0x76, 0x41, 0x6c } }      /* dataFormat="CvAl" */
592};
593
594enum {
595    FMT_RES,
596    FMT_CNV,
597    FMT_ALIAS,
598    FMT_COUNT
599};
600
601static int32_t
602getDataFormat(const uint8_t dataFormat[4]) {
603    int32_t i;
604
605    for(i=0; i<FMT_COUNT; ++i) {
606        if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
607            return i;
608        }
609    }
610    return -1;
611}
612
613// enumerate dependencies of a package item -------------------------------- ***
614
615U_NAMESPACE_BEGIN
616
617void
618Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
619    const UDataInfo *pInfo;
620    const uint8_t *inBytes;
621    int32_t format, length, infoLength, itemHeaderLength;
622    UErrorCode errorCode;
623
624    errorCode=U_ZERO_ERROR;
625    pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode);
626    if(U_FAILURE(errorCode)) {
627        return; // should not occur because readFile() checks headers
628    }
629
630    // find the data format and call the corresponding function, if any
631    format=getDataFormat(pInfo->dataFormat);
632    if(format>=0) {
633        UDataSwapper *ds;
634
635        // TODO: share/cache swappers
636        ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
637        if(U_FAILURE(errorCode)) {
638            fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
639                    pItem->name, u_errorName(errorCode));
640            exit(errorCode);
641        }
642
643        ds->printError=printError;
644        ds->printErrorContext=stderr;
645
646        inBytes=pItem->data+itemHeaderLength;
647        length=pItem->length-itemHeaderLength;
648
649        switch(format) {
650        case FMT_RES:
651            ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
652            break;
653        case FMT_CNV:
654            ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
655            break;
656        default:
657            break;
658        }
659
660        udata_closeSwapper(ds);
661
662        if(U_FAILURE(errorCode)) {
663            exit(errorCode);
664        }
665    }
666}
667U_NAMESPACE_END
668