pkgitems.cpp revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/*
2*******************************************************************************
3*
4*   Copyright (C) 2003-2007, International Business Machines
5*   Corporation and others.  All Rights Reserved.
6*
7*******************************************************************************
8*   file name:  pkgitems.cpp
9*   encoding:   US-ASCII
10*   tab size:   8 (not used)
11*   indentation:4
12*
13*   created on: 2005sep18
14*   created by: Markus W. Scherer
15*
16*   Companion file to package.cpp. Deals with details of ICU data item formats.
17*   Used for item dependencies.
18*   Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003).
19*/
20
21#include "unicode/utypes.h"
22#include "unicode/ures.h"
23#include "unicode/putil.h"
24#include "unicode/udata.h"
25#include "cstring.h"
26#include "ucmndata.h"
27#include "udataswp.h"
28#include "swapimpl.h"
29#include "toolutil.h"
30#include "package.h"
31#include "pkg_imp.h"
32
33#include <stdio.h>
34#include <stdlib.h>
35#include <string.h>
36
37/* item formats in common */
38
39#include "uresdata.h"
40#include "ucnv_bld.h"
41#include "ucnv_io.h"
42
43// general definitions ----------------------------------------------------- ***
44
45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0]))
46
47U_CDECL_BEGIN
48
49static void U_CALLCONV
50printError(void *context, const char *fmt, va_list args) {
51    vfprintf((FILE *)context, fmt, args);
52}
53
54U_CDECL_END
55
56// check a dependency ------------------------------------------------------ ***
57
58/*
59 * assemble the target item name from the source item name, an ID
60 * and a suffix
61 */
62static void
63checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix,
64              CheckDependency check, void *context,
65              UErrorCode *pErrorCode) {
66    char target[200];
67    const char *itemID;
68    int32_t treeLength, suffixLength, targetLength;
69
70    // get the item basename
71    itemID=strrchr(itemName, '/');
72    if(itemID!=NULL) {
73        ++itemID;
74    } else {
75        itemID=itemName;
76    }
77
78    // build the target string
79    treeLength=(int32_t)(itemID-itemName);
80    if(idLength<0) {
81        idLength=(int32_t)strlen(id);
82    }
83    suffixLength=(int32_t)strlen(suffix);
84    targetLength=treeLength+idLength+suffixLength;
85    if(targetLength>=(int32_t)sizeof(target)) {
86        fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n",
87                        itemName, (long)targetLength);
88        *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
89        return;
90    }
91
92    memcpy(target, itemName, treeLength);
93    memcpy(target+treeLength, id, idLength);
94    memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL
95
96    check(context, itemName, target);
97}
98
99/* assemble the target item name from the item's parent item name */
100static void
101checkParent(const char *itemName, CheckDependency check, void *context,
102            UErrorCode *pErrorCode) {
103    const char *itemID, *parent, *parentLimit, *suffix;
104    int32_t parentLength;
105
106    // get the item basename
107    itemID=strrchr(itemName, '/');
108    if(itemID!=NULL) {
109        ++itemID;
110    } else {
111        itemID=itemName;
112    }
113
114    // get the item suffix
115    suffix=strrchr(itemID, '.');
116    if(suffix==NULL) {
117        // empty suffix, point to the end of the string
118        suffix=strrchr(itemID, 0);
119    }
120
121    // get the position of the last '_'
122    for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {}
123
124    if(parentLimit!=itemID) {
125        // get the parent item name by truncating the last part of this item's name */
126        parent=itemID;
127        parentLength=(int32_t)(parentLimit-itemID);
128    } else {
129        // no '_' in the item name: the parent is the root bundle
130        parent="root";
131        parentLength=4;
132        if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) {
133            // the item itself is "root", which does not depend on a parent
134            return;
135        }
136    }
137    checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode);
138}
139
140// get dependencies from resource bundles ---------------------------------- ***
141
142static const char *const gAliasKey="%%ALIAS";
143enum { gAliasKeyLength=7 };
144
145/*
146 * Enumerate one resource item and its children and extract dependencies from
147 * aliases.
148 * Code adapted from ures_preflightResource() and ures_swapResource().
149 */
150static void
151ures_enumDependencies(const UDataSwapper *ds,
152                      const char *itemName,
153                      const Resource *inBundle, int32_t length,
154                      Resource res, const char *inKey, int32_t depth,
155                      CheckDependency check, void *context,
156                      UErrorCode *pErrorCode) {
157    const Resource *p;
158    int32_t offset;
159
160    if(res==0 || RES_GET_TYPE(res)==URES_INT) {
161        /* empty string or integer, nothing to do */
162        return;
163    }
164
165    /* all other types use an offset to point to their data */
166    offset=(int32_t)RES_GET_OFFSET(res);
167    if(0<=length && length<=offset) {
168        udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n",
169                         itemName, res, length);
170        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
171        return;
172    }
173    p=inBundle+offset;
174
175    switch(RES_GET_TYPE(res)) {
176        /* strings and aliases have physically the same value layout */
177    case URES_STRING:
178        // we ignore all strings except top-level strings with a %%ALIAS key
179        if(depth!=1) {
180            break;
181        } else {
182            char key[8];
183            int32_t keyLength;
184
185            keyLength=(int32_t)strlen(inKey);
186            if(keyLength!=gAliasKeyLength) {
187                break;
188            }
189            ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode);
190            if(U_FAILURE(*pErrorCode)) {
191                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n",
192                                itemName, res);
193                return;
194            }
195            if(0!=strcmp(key, gAliasKey)) {
196                break;
197            }
198        }
199        // for the top-level %%ALIAS string fall through to URES_ALIAS
200    case URES_ALIAS:
201        {
202            char localeID[32];
203            const uint16_t *p16;
204            int32_t i, stringLength;
205            uint16_t u16, ored16;
206
207            stringLength=udata_readInt32(ds, (int32_t)*p);
208
209            /* top=offset+1+(string length +1)/2 rounded up */
210            offset+=1+((stringLength+1)+1)/2;
211            if(offset>length) {
212                break; // the resource does not fit into the bundle, print error below
213            }
214
215            // extract the locale ID from alias strings like
216            // locale_ID/key1/key2/key3
217            // locale_ID
218            if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
219                u16=0x2f;   // slash in local endianness
220            } else {
221                u16=0x2f00; // slash in opposite endianness
222            }
223            p16=(const uint16_t *)(p+1); // Unicode string contents
224
225            // search for the first slash
226            for(i=0; i<stringLength && p16[i]!=u16; ++i) {}
227
228            if(RES_GET_TYPE(res)==URES_ALIAS) {
229                // ignore aliases with an initial slash:
230                // /ICUDATA/... and /pkgname/... go to a different package
231                // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle
232                if(i==0) {
233                    break; // initial slash ('/')
234                }
235
236                // ignore the intra-bundle path starting from the first slash ('/')
237                stringLength=i;
238            } else /* URES_STRING */ {
239                // the whole string should only consist of a locale ID
240                if(i!=stringLength) {
241                    udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n",
242                                    itemName, res);
243                    *pErrorCode=U_UNSUPPORTED_ERROR;
244                    return;
245                }
246            }
247
248            // convert the Unicode string to char * and
249            // check that it has a bundle path but no package
250            if(stringLength>=(int32_t)sizeof(localeID)) {
251                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n",
252                                itemName, res, stringLength);
253                *pErrorCode=U_BUFFER_OVERFLOW_ERROR;
254                return;
255            }
256
257            // convert the alias Unicode string to US-ASCII
258            ored16=0;
259            if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) {
260                for(i=0; i<stringLength; ++i) {
261                    u16=p16[i];
262                    ored16|=u16;
263                    localeID[i]=(char)u16;
264                }
265            } else {
266                for(i=0; i<stringLength; ++i) {
267                    u16=p16[i];
268                    ored16|=u16;
269                    localeID[i]=(char)(u16>>8);
270                }
271                ored16=(uint16_t)((ored16<<8)|(ored16>>8));
272            }
273            localeID[stringLength]=0;
274            if(ored16>0x7f) {
275                udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n",
276                                itemName, res);
277                *pErrorCode=U_INVALID_CHAR_FOUND;
278                return;
279            }
280
281#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY)
282            // swap to EBCDIC
283            // our swapper is probably not the right one, but
284            // the function uses it only for printing errors
285            uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode);
286            if(U_FAILURE(*pErrorCode)) {
287                return;
288            }
289#endif
290#if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY
291#           error Unknown U_CHARSET_FAMILY value!
292#endif
293
294            checkIDSuffix(itemName, localeID, -1, ".res", check, context, pErrorCode);
295        }
296        break;
297    case URES_TABLE:
298    case URES_TABLE32:
299        {
300            const uint16_t *pKey16;
301            const int32_t *pKey32;
302
303            Resource item;
304            int32_t i, count;
305
306            if(RES_GET_TYPE(res)==URES_TABLE) {
307                /* get table item count */
308                pKey16=(const uint16_t *)p;
309                count=ds->readUInt16(*pKey16++);
310
311                pKey32=NULL;
312
313                /* top=((1+ table item count)/2 rounded up)+(table item count) */
314                offset+=((1+count)+1)/2;
315            } else {
316                /* get table item count */
317                pKey32=(const int32_t *)p;
318                count=udata_readInt32(ds, *pKey32++);
319
320                pKey16=NULL;
321
322                /* top=(1+ table item count)+(table item count) */
323                offset+=1+count;
324            }
325
326            p=inBundle+offset; /* pointer to table resources */
327            offset+=count;
328
329            if(offset>length) {
330                break; // the resource does not fit into the bundle, print error below
331            }
332
333            /* recurse */
334            for(i=0; i<count; ++i) {
335                item=ds->readUInt32(*p++);
336                ures_enumDependencies(
337                        ds, itemName, inBundle, length, item,
338                        ((const char *)inBundle)+
339                            (pKey16!=NULL ?
340                                ds->readUInt16(pKey16[i]) :
341                                udata_readInt32(ds, pKey32[i])),
342                        depth+1,
343                        check, context,
344                        pErrorCode);
345                if(U_FAILURE(*pErrorCode)) {
346                    udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n",
347                                        itemName, res, i, item);
348                    break;
349                }
350            }
351        }
352        break;
353    case URES_ARRAY:
354        {
355            Resource item;
356            int32_t i, count;
357
358            /* top=offset+1+(array length) */
359            count=udata_readInt32(ds, (int32_t)*p++);
360            offset+=1+count;
361
362            if(offset>length) {
363                break; // the resource does not fit into the bundle, print error below
364            }
365
366            /* recurse */
367            for(i=0; i<count; ++i) {
368                item=ds->readUInt32(*p++);
369                ures_enumDependencies(
370                        ds, itemName, inBundle, length,
371                        item, NULL, depth+1,
372                        check, context,
373                        pErrorCode);
374                if(U_FAILURE(*pErrorCode)) {
375                    udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n",
376                                        itemName, res, i, item);
377                    break;
378                }
379            }
380        }
381        break;
382    default:
383        break;
384    }
385
386    if(U_FAILURE(*pErrorCode)) {
387        /* nothing to do */
388    } else if(0<=length && length<offset) {
389        udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n",
390                         itemName, res, length);
391        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
392    }
393}
394
395/* code adapted from ures_swap() */
396static void
397ures_enumDependencies(const UDataSwapper *ds,
398                      const char *itemName, const UDataInfo *pInfo,
399                      const uint8_t *inBytes, int32_t length,
400                      CheckDependency check, void *context,
401                      UErrorCode *pErrorCode) {
402    const Resource *inBundle;
403    Resource rootRes;
404
405    /* the following integers count Resource item offsets (4 bytes each), not bytes */
406    int32_t bundleLength;
407
408    /* check format version */
409    if(pInfo->formatVersion[0]!=1) {
410        fprintf(stderr, "icupkg: .res format version %02x not supported\n",
411                        pInfo->formatVersion[0]);
412        exit(U_UNSUPPORTED_ERROR);
413    }
414
415    /* a resource bundle must contain at least one resource item */
416    bundleLength=length/4;
417
418    /* formatVersion 1.1 must have a root item and at least 5 indexes */
419    if( bundleLength<
420            (pInfo->formatVersion[1]==0 ? 1 : 1+5)
421    ) {
422        fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n",
423                        length);
424        exit(U_INDEX_OUTOFBOUNDS_ERROR);
425    }
426
427    inBundle=(const Resource *)inBytes;
428    rootRes=ds->readUInt32(*inBundle);
429
430    ures_enumDependencies(
431        ds, itemName, inBundle, bundleLength,
432        rootRes, NULL, 0,
433        check, context,
434        pErrorCode);
435
436    /*
437     * if the bundle attributes are present and the nofallback flag is not set,
438     * then add the parent bundle as a dependency
439     */
440    if(pInfo->formatVersion[1]>=1) {
441        int32_t indexes[URES_INDEX_TOP];
442        const int32_t *inIndexes;
443
444        inIndexes=(const int32_t *)inBundle+1;
445        indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]);
446        if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) {
447            indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]);
448            if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) {
449                /* this bundle participates in locale fallback */
450                checkParent(itemName, check, context, pErrorCode);
451            }
452        }
453    }
454}
455
456// get dependencies from conversion tables --------------------------------- ***
457
458/* code adapted from ucnv_swap() */
459static void
460ucnv_enumDependencies(const UDataSwapper *ds,
461                      const char *itemName, const UDataInfo *pInfo,
462                      const uint8_t *inBytes, int32_t length,
463                      CheckDependency check, void *context,
464                      UErrorCode *pErrorCode) {
465    uint32_t staticDataSize;
466
467    const UConverterStaticData *inStaticData;
468
469    const _MBCSHeader *inMBCSHeader;
470    uint8_t outputType;
471
472    /* check format version */
473    if(!(
474        pInfo->formatVersion[0]==6 &&
475        pInfo->formatVersion[1]>=2
476    )) {
477        fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n",
478                        pInfo->formatVersion[0], pInfo->formatVersion[1]);
479        exit(U_UNSUPPORTED_ERROR);
480    }
481
482    /* read the initial UConverterStaticData structure after the UDataInfo header */
483    inStaticData=(const UConverterStaticData *)inBytes;
484
485    if( length<(int32_t)sizeof(UConverterStaticData) ||
486        (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize))
487    ) {
488        udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n",
489                            length);
490        *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
491        return;
492    }
493
494    inBytes+=staticDataSize;
495    length-=(int32_t)staticDataSize;
496
497    /* check for supported conversionType values */
498    if(inStaticData->conversionType==UCNV_MBCS) {
499        /* MBCS data */
500        uint32_t mbcsHeaderFlags;
501        int32_t extOffset;
502
503        inMBCSHeader=(const _MBCSHeader *)inBytes;
504
505        if(length<(int32_t)sizeof(_MBCSHeader)) {
506            udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n",
507                                length);
508            *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
509            return;
510        }
511        if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) {
512            udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n",
513                             inMBCSHeader->version[0], inMBCSHeader->version[1]);
514            *pErrorCode=U_UNSUPPORTED_ERROR;
515            return;
516        }
517
518        mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags);
519        extOffset=(int32_t)(mbcsHeaderFlags>>8);
520        outputType=(uint8_t)mbcsHeaderFlags;
521
522        if(outputType==MBCS_OUTPUT_EXT_ONLY) {
523            /*
524             * extension-only file,
525             * contains a base name instead of normal base table data
526             */
527            char baseName[32];
528            int32_t baseNameLength;
529
530            /* there is extension data after the base data, see ucnv_ext.h */
531            if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) {
532                udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n",
533                                 length);
534                *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR;
535                return;
536            }
537
538            /* swap the base name, between the header and the extension data */
539            baseNameLength=(int32_t)strlen((const char *)(inMBCSHeader+1));
540            if(baseNameLength>=(int32_t)sizeof(baseName)) {
541                udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n",
542                                 itemName, baseNameLength);
543                *pErrorCode=U_UNSUPPORTED_ERROR;
544                return;
545            }
546            ds->swapInvChars(ds, inMBCSHeader+1, baseNameLength+1, baseName, pErrorCode);
547
548            checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode);
549        }
550    }
551}
552
553// ICU data formats -------------------------------------------------------- ***
554
555static const struct {
556    uint8_t dataFormat[4];
557} dataFormats[]={
558    { { 0x52, 0x65, 0x73, 0x42 } },     /* dataFormat="ResB" */
559    { { 0x63, 0x6e, 0x76, 0x74 } },     /* dataFormat="cnvt" */
560    { { 0x43, 0x76, 0x41, 0x6c } }      /* dataFormat="CvAl" */
561};
562
563enum {
564    FMT_RES,
565    FMT_CNV,
566    FMT_ALIAS,
567    FMT_COUNT
568};
569
570static int32_t
571getDataFormat(const uint8_t dataFormat[4]) {
572    int32_t i;
573
574    for(i=0; i<FMT_COUNT; ++i) {
575        if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) {
576            return i;
577        }
578    }
579    return -1;
580}
581
582// enumerate dependencies of a package item -------------------------------- ***
583
584U_NAMESPACE_BEGIN
585
586void
587Package::enumDependencies(Item *pItem, void *context, CheckDependency check) {
588    const UDataInfo *pInfo;
589    const uint8_t *inBytes;
590    int32_t format, length, infoLength, itemHeaderLength;
591    UErrorCode errorCode;
592
593    errorCode=U_ZERO_ERROR;
594    pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode);
595    if(U_FAILURE(errorCode)) {
596        return; // should not occur because readFile() checks headers
597    }
598
599    // find the data format and call the corresponding function, if any
600    format=getDataFormat(pInfo->dataFormat);
601    if(format>=0) {
602        UDataSwapper *ds;
603
604        // TODO: share/cache swappers
605        ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode);
606        if(U_FAILURE(errorCode)) {
607            fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n",
608                    pItem->name, u_errorName(errorCode));
609            exit(errorCode);
610        }
611
612        ds->printError=printError;
613        ds->printErrorContext=stderr;
614
615        inBytes=pItem->data+itemHeaderLength;
616        length=pItem->length-itemHeaderLength;
617
618        switch(format) {
619        case FMT_RES:
620            ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
621            break;
622        case FMT_CNV:
623            ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode);
624            break;
625        default:
626            break;
627        }
628
629        udata_closeSwapper(ds);
630
631        if(U_FAILURE(errorCode)) {
632            exit(errorCode);
633        }
634    }
635}
636U_NAMESPACE_END
637