pkgitems.cpp revision 51cfa1a9a96cad34675a6415fe86dfdf3f525bb6
1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2007, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: pkgitems.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005sep18 14* created by: Markus W. Scherer 15* 16* Companion file to package.cpp. Deals with details of ICU data item formats. 17* Used for item dependencies. 18* Contains adapted code from uresdata.c and ucnv_bld.c (swapper code from 2003). 19*/ 20 21#include "unicode/utypes.h" 22#include "unicode/ures.h" 23#include "unicode/putil.h" 24#include "unicode/udata.h" 25#include "cstring.h" 26#include "ucmndata.h" 27#include "udataswp.h" 28#include "swapimpl.h" 29#include "toolutil.h" 30#include "package.h" 31#include "pkg_imp.h" 32 33#include <stdio.h> 34#include <stdlib.h> 35#include <string.h> 36 37/* item formats in common */ 38 39#include "uresdata.h" 40#include "ucnv_bld.h" 41#include "ucnv_io.h" 42 43// general definitions ----------------------------------------------------- *** 44 45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47U_CDECL_BEGIN 48 49static void U_CALLCONV 50printError(void *context, const char *fmt, va_list args) { 51 vfprintf((FILE *)context, fmt, args); 52} 53 54U_CDECL_END 55 56// check a dependency ------------------------------------------------------ *** 57 58/* 59 * assemble the target item name from the source item name, an ID 60 * and a suffix 61 */ 62static void 63checkIDSuffix(const char *itemName, const char *id, int32_t idLength, const char *suffix, 64 CheckDependency check, void *context, 65 UErrorCode *pErrorCode) { 66 char target[200]; 67 const char *itemID; 68 int32_t treeLength, suffixLength, targetLength; 69 70 // get the item basename 71 itemID=strrchr(itemName, '/'); 72 if(itemID!=NULL) { 73 ++itemID; 74 } else { 75 itemID=itemName; 76 } 77 78 // build the target string 79 treeLength=(int32_t)(itemID-itemName); 80 if(idLength<0) { 81 idLength=(int32_t)strlen(id); 82 } 83 suffixLength=(int32_t)strlen(suffix); 84 targetLength=treeLength+idLength+suffixLength; 85 if(targetLength>=(int32_t)sizeof(target)) { 86 fprintf(stderr, "icupkg/checkIDSuffix(%s) alias target item name length %ld too long\n", 87 itemName, (long)targetLength); 88 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 89 return; 90 } 91 92 memcpy(target, itemName, treeLength); 93 memcpy(target+treeLength, id, idLength); 94 memcpy(target+treeLength+idLength, suffix, suffixLength+1); // +1 includes the terminating NUL 95 96 check(context, itemName, target); 97} 98 99/* assemble the target item name from the item's parent item name */ 100static void 101checkParent(const char *itemName, CheckDependency check, void *context, 102 UErrorCode *pErrorCode) { 103 const char *itemID, *parent, *parentLimit, *suffix; 104 int32_t parentLength; 105 106 // get the item basename 107 itemID=strrchr(itemName, '/'); 108 if(itemID!=NULL) { 109 ++itemID; 110 } else { 111 itemID=itemName; 112 } 113 114 // get the item suffix 115 suffix=strrchr(itemID, '.'); 116 if(suffix==NULL) { 117 // empty suffix, point to the end of the string 118 suffix=strrchr(itemID, 0); 119 } 120 121 // get the position of the last '_' 122 for(parentLimit=suffix; parentLimit>itemID && *--parentLimit!='_';) {} 123 124 if(parentLimit!=itemID) { 125 // get the parent item name by truncating the last part of this item's name */ 126 parent=itemID; 127 parentLength=(int32_t)(parentLimit-itemID); 128 } else { 129 // no '_' in the item name: the parent is the root bundle 130 parent="root"; 131 parentLength=4; 132 if((suffix-itemID)==parentLength && 0==memcmp(itemID, parent, parentLength)) { 133 // the item itself is "root", which does not depend on a parent 134 return; 135 } 136 } 137 checkIDSuffix(itemName, parent, parentLength, suffix, check, context, pErrorCode); 138} 139 140// get dependencies from resource bundles ---------------------------------- *** 141 142static const char *const gAliasKey="%%ALIAS"; 143enum { gAliasKeyLength=7 }; 144 145/* 146 * Enumerate one resource item and its children and extract dependencies from 147 * aliases. 148 * Code adapted from ures_preflightResource() and ures_swapResource(). 149 */ 150static void 151ures_enumDependencies(const UDataSwapper *ds, 152 const char *itemName, 153 const Resource *inBundle, int32_t length, 154 Resource res, const char *inKey, int32_t depth, 155 CheckDependency check, void *context, 156 UErrorCode *pErrorCode) { 157 const Resource *p; 158 int32_t offset; 159 160 if(res==0 || RES_GET_TYPE(res)==URES_INT) { 161 /* empty string or integer, nothing to do */ 162 return; 163 } 164 165 /* all other types use an offset to point to their data */ 166 offset=(int32_t)RES_GET_OFFSET(res); 167 if(0<=length && length<=offset) { 168 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource offset exceeds bundle length %d\n", 169 itemName, res, length); 170 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 171 return; 172 } 173 p=inBundle+offset; 174 175 switch(RES_GET_TYPE(res)) { 176 /* strings and aliases have physically the same value layout */ 177 case URES_STRING: 178 // we ignore all strings except top-level strings with a %%ALIAS key 179 if(depth!=1) { 180 break; 181 } else { 182 char key[8]; 183 int32_t keyLength; 184 185 keyLength=(int32_t)strlen(inKey); 186 if(keyLength!=gAliasKeyLength) { 187 break; 188 } 189 ds->swapInvChars(ds, inKey, gAliasKeyLength+1, key, pErrorCode); 190 if(U_FAILURE(*pErrorCode)) { 191 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) string key contains variant characters\n", 192 itemName, res); 193 return; 194 } 195 if(0!=strcmp(key, gAliasKey)) { 196 break; 197 } 198 } 199 // for the top-level %%ALIAS string fall through to URES_ALIAS 200 case URES_ALIAS: 201 { 202 char localeID[32]; 203 const uint16_t *p16; 204 int32_t i, stringLength; 205 uint16_t u16, ored16; 206 207 stringLength=udata_readInt32(ds, (int32_t)*p); 208 209 /* top=offset+1+(string length +1)/2 rounded up */ 210 offset+=1+((stringLength+1)+1)/2; 211 if(offset>length) { 212 break; // the resource does not fit into the bundle, print error below 213 } 214 215 // extract the locale ID from alias strings like 216 // locale_ID/key1/key2/key3 217 // locale_ID 218 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { 219 u16=0x2f; // slash in local endianness 220 } else { 221 u16=0x2f00; // slash in opposite endianness 222 } 223 p16=(const uint16_t *)(p+1); // Unicode string contents 224 225 // search for the first slash 226 for(i=0; i<stringLength && p16[i]!=u16; ++i) {} 227 228 if(RES_GET_TYPE(res)==URES_ALIAS) { 229 // ignore aliases with an initial slash: 230 // /ICUDATA/... and /pkgname/... go to a different package 231 // /LOCALE/... are for dynamic sideways fallbacks and don't go to a fixed bundle 232 if(i==0) { 233 break; // initial slash ('/') 234 } 235 236 // ignore the intra-bundle path starting from the first slash ('/') 237 stringLength=i; 238 } else /* URES_STRING */ { 239 // the whole string should only consist of a locale ID 240 if(i!=stringLength) { 241 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) %%ALIAS contains a '/'\n", 242 itemName, res); 243 *pErrorCode=U_UNSUPPORTED_ERROR; 244 return; 245 } 246 } 247 248 // convert the Unicode string to char * and 249 // check that it has a bundle path but no package 250 if(stringLength>=(int32_t)sizeof(localeID)) { 251 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias locale ID length %ld too long\n", 252 itemName, res, stringLength); 253 *pErrorCode=U_BUFFER_OVERFLOW_ERROR; 254 return; 255 } 256 257 // convert the alias Unicode string to US-ASCII 258 ored16=0; 259 if(U_IS_BIG_ENDIAN==ds->inIsBigEndian) { 260 for(i=0; i<stringLength; ++i) { 261 u16=p16[i]; 262 ored16|=u16; 263 localeID[i]=(char)u16; 264 } 265 } else { 266 for(i=0; i<stringLength; ++i) { 267 u16=p16[i]; 268 ored16|=u16; 269 localeID[i]=(char)(u16>>8); 270 } 271 ored16=(uint16_t)((ored16<<8)|(ored16>>8)); 272 } 273 localeID[stringLength]=0; 274 if(ored16>0x7f) { 275 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) alias string contains non-ASCII characters\n", 276 itemName, res); 277 *pErrorCode=U_INVALID_CHAR_FOUND; 278 return; 279 } 280 281#if (U_CHARSET_FAMILY==U_EBCDIC_FAMILY) 282 // swap to EBCDIC 283 // our swapper is probably not the right one, but 284 // the function uses it only for printing errors 285 uprv_ebcdicFromAscii(ds, localeID, stringLength, localeID, pErrorCode); 286 if(U_FAILURE(*pErrorCode)) { 287 return; 288 } 289#endif 290#if U_CHARSET_FAMILY!=U_ASCII_FAMILY && U_CHARSET_FAMILY!=U_EBCDIC_FAMILY 291# error Unknown U_CHARSET_FAMILY value! 292#endif 293 294 checkIDSuffix(itemName, localeID, -1, ".res", check, context, pErrorCode); 295 } 296 break; 297 case URES_TABLE: 298 case URES_TABLE32: 299 { 300 const uint16_t *pKey16; 301 const int32_t *pKey32; 302 303 Resource item; 304 int32_t i, count; 305 306 if(RES_GET_TYPE(res)==URES_TABLE) { 307 /* get table item count */ 308 pKey16=(const uint16_t *)p; 309 count=ds->readUInt16(*pKey16++); 310 311 pKey32=NULL; 312 313 /* top=((1+ table item count)/2 rounded up)+(table item count) */ 314 offset+=((1+count)+1)/2; 315 } else { 316 /* get table item count */ 317 pKey32=(const int32_t *)p; 318 count=udata_readInt32(ds, *pKey32++); 319 320 pKey16=NULL; 321 322 /* top=(1+ table item count)+(table item count) */ 323 offset+=1+count; 324 } 325 326 p=inBundle+offset; /* pointer to table resources */ 327 offset+=count; 328 329 if(offset>length) { 330 break; // the resource does not fit into the bundle, print error below 331 } 332 333 /* recurse */ 334 for(i=0; i<count; ++i) { 335 item=ds->readUInt32(*p++); 336 ures_enumDependencies( 337 ds, itemName, inBundle, length, item, 338 ((const char *)inBundle)+ 339 (pKey16!=NULL ? 340 ds->readUInt16(pKey16[i]) : 341 udata_readInt32(ds, pKey32[i])), 342 depth+1, 343 check, context, 344 pErrorCode); 345 if(U_FAILURE(*pErrorCode)) { 346 udata_printError(ds, "icupkg/ures_enumDependencies(%s table res=%08x)[%d].recurse(%08x) failed\n", 347 itemName, res, i, item); 348 break; 349 } 350 } 351 } 352 break; 353 case URES_ARRAY: 354 { 355 Resource item; 356 int32_t i, count; 357 358 /* top=offset+1+(array length) */ 359 count=udata_readInt32(ds, (int32_t)*p++); 360 offset+=1+count; 361 362 if(offset>length) { 363 break; // the resource does not fit into the bundle, print error below 364 } 365 366 /* recurse */ 367 for(i=0; i<count; ++i) { 368 item=ds->readUInt32(*p++); 369 ures_enumDependencies( 370 ds, itemName, inBundle, length, 371 item, NULL, depth+1, 372 check, context, 373 pErrorCode); 374 if(U_FAILURE(*pErrorCode)) { 375 udata_printError(ds, "icupkg/ures_enumDependencies(%s array res=%08x)[%d].recurse(%08x) failed\n", 376 itemName, res, i, item); 377 break; 378 } 379 } 380 } 381 break; 382 default: 383 break; 384 } 385 386 if(U_FAILURE(*pErrorCode)) { 387 /* nothing to do */ 388 } else if(0<=length && length<offset) { 389 udata_printError(ds, "icupkg/ures_enumDependencies(%s res=%08x) resource limit exceeds bundle length %d\n", 390 itemName, res, length); 391 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 392 } 393} 394 395/* code adapted from ures_swap() */ 396static void 397ures_enumDependencies(const UDataSwapper *ds, 398 const char *itemName, const UDataInfo *pInfo, 399 const uint8_t *inBytes, int32_t length, 400 CheckDependency check, void *context, 401 UErrorCode *pErrorCode) { 402 const Resource *inBundle; 403 Resource rootRes; 404 405 /* the following integers count Resource item offsets (4 bytes each), not bytes */ 406 int32_t bundleLength; 407 408 /* check format version */ 409 if(pInfo->formatVersion[0]!=1) { 410 fprintf(stderr, "icupkg: .res format version %02x not supported\n", 411 pInfo->formatVersion[0]); 412 exit(U_UNSUPPORTED_ERROR); 413 } 414 415 /* a resource bundle must contain at least one resource item */ 416 bundleLength=length/4; 417 418 /* formatVersion 1.1 must have a root item and at least 5 indexes */ 419 if( bundleLength< 420 (pInfo->formatVersion[1]==0 ? 1 : 1+5) 421 ) { 422 fprintf(stderr, "icupkg: too few bytes (%d after header) for a resource bundle\n", 423 length); 424 exit(U_INDEX_OUTOFBOUNDS_ERROR); 425 } 426 427 inBundle=(const Resource *)inBytes; 428 rootRes=ds->readUInt32(*inBundle); 429 430 ures_enumDependencies( 431 ds, itemName, inBundle, bundleLength, 432 rootRes, NULL, 0, 433 check, context, 434 pErrorCode); 435 436 /* 437 * if the bundle attributes are present and the nofallback flag is not set, 438 * then add the parent bundle as a dependency 439 */ 440 if(pInfo->formatVersion[1]>=1) { 441 int32_t indexes[URES_INDEX_TOP]; 442 const int32_t *inIndexes; 443 444 inIndexes=(const int32_t *)inBundle+1; 445 indexes[URES_INDEX_LENGTH]=udata_readInt32(ds, inIndexes[URES_INDEX_LENGTH]); 446 if(indexes[URES_INDEX_LENGTH]>URES_INDEX_ATTRIBUTES) { 447 indexes[URES_INDEX_ATTRIBUTES]=udata_readInt32(ds, inIndexes[URES_INDEX_ATTRIBUTES]); 448 if(0==(indexes[URES_INDEX_ATTRIBUTES]&URES_ATT_NO_FALLBACK)) { 449 /* this bundle participates in locale fallback */ 450 checkParent(itemName, check, context, pErrorCode); 451 } 452 } 453 } 454} 455 456// get dependencies from conversion tables --------------------------------- *** 457 458/* code adapted from ucnv_swap() */ 459static void 460ucnv_enumDependencies(const UDataSwapper *ds, 461 const char *itemName, const UDataInfo *pInfo, 462 const uint8_t *inBytes, int32_t length, 463 CheckDependency check, void *context, 464 UErrorCode *pErrorCode) { 465 uint32_t staticDataSize; 466 467 const UConverterStaticData *inStaticData; 468 469 const _MBCSHeader *inMBCSHeader; 470 uint8_t outputType; 471 472 /* check format version */ 473 if(!( 474 pInfo->formatVersion[0]==6 && 475 pInfo->formatVersion[1]>=2 476 )) { 477 fprintf(stderr, "icupkg/ucnv_enumDependencies(): .cnv format version %02x.%02x not supported\n", 478 pInfo->formatVersion[0], pInfo->formatVersion[1]); 479 exit(U_UNSUPPORTED_ERROR); 480 } 481 482 /* read the initial UConverterStaticData structure after the UDataInfo header */ 483 inStaticData=(const UConverterStaticData *)inBytes; 484 485 if( length<(int32_t)sizeof(UConverterStaticData) || 486 (uint32_t)length<(staticDataSize=ds->readUInt32(inStaticData->structSize)) 487 ) { 488 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after header) for an ICU .cnv conversion table\n", 489 length); 490 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 491 return; 492 } 493 494 inBytes+=staticDataSize; 495 length-=(int32_t)staticDataSize; 496 497 /* check for supported conversionType values */ 498 if(inStaticData->conversionType==UCNV_MBCS) { 499 /* MBCS data */ 500 uint32_t mbcsHeaderFlags; 501 int32_t extOffset; 502 503 inMBCSHeader=(const _MBCSHeader *)inBytes; 504 505 if(length<(int32_t)sizeof(_MBCSHeader)) { 506 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table\n", 507 length); 508 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 509 return; 510 } 511 if(!(inMBCSHeader->version[0]==4 && inMBCSHeader->version[1]>=1)) { 512 udata_printError(ds, "icupkg/ucnv_enumDependencies(): unsupported _MBCSHeader.version %d.%d\n", 513 inMBCSHeader->version[0], inMBCSHeader->version[1]); 514 *pErrorCode=U_UNSUPPORTED_ERROR; 515 return; 516 } 517 518 mbcsHeaderFlags=ds->readUInt32(inMBCSHeader->flags); 519 extOffset=(int32_t)(mbcsHeaderFlags>>8); 520 outputType=(uint8_t)mbcsHeaderFlags; 521 522 if(outputType==MBCS_OUTPUT_EXT_ONLY) { 523 /* 524 * extension-only file, 525 * contains a base name instead of normal base table data 526 */ 527 char baseName[32]; 528 int32_t baseNameLength; 529 530 /* there is extension data after the base data, see ucnv_ext.h */ 531 if(length<(extOffset+UCNV_EXT_INDEXES_MIN_LENGTH*4)) { 532 udata_printError(ds, "icupkg/ucnv_enumDependencies(): too few bytes (%d after headers) for an ICU MBCS .cnv conversion table with extension data\n", 533 length); 534 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 535 return; 536 } 537 538 /* swap the base name, between the header and the extension data */ 539 baseNameLength=(int32_t)strlen((const char *)(inMBCSHeader+1)); 540 if(baseNameLength>=(int32_t)sizeof(baseName)) { 541 udata_printError(ds, "icupkg/ucnv_enumDependencies(%s): base name length %ld too long\n", 542 itemName, baseNameLength); 543 *pErrorCode=U_UNSUPPORTED_ERROR; 544 return; 545 } 546 ds->swapInvChars(ds, inMBCSHeader+1, baseNameLength+1, baseName, pErrorCode); 547 548 checkIDSuffix(itemName, baseName, -1, ".cnv", check, context, pErrorCode); 549 } 550 } 551} 552 553// ICU data formats -------------------------------------------------------- *** 554 555static const struct { 556 uint8_t dataFormat[4]; 557} dataFormats[]={ 558 { { 0x52, 0x65, 0x73, 0x42 } }, /* dataFormat="ResB" */ 559 { { 0x63, 0x6e, 0x76, 0x74 } }, /* dataFormat="cnvt" */ 560 { { 0x43, 0x76, 0x41, 0x6c } } /* dataFormat="CvAl" */ 561}; 562 563enum { 564 FMT_RES, 565 FMT_CNV, 566 FMT_ALIAS, 567 FMT_COUNT 568}; 569 570static int32_t 571getDataFormat(const uint8_t dataFormat[4]) { 572 int32_t i; 573 574 for(i=0; i<FMT_COUNT; ++i) { 575 if(0==memcmp(dataFormats[i].dataFormat, dataFormat, 4)) { 576 return i; 577 } 578 } 579 return -1; 580} 581 582// enumerate dependencies of a package item -------------------------------- *** 583 584U_NAMESPACE_BEGIN 585 586void 587Package::enumDependencies(Item *pItem, void *context, CheckDependency check) { 588 const UDataInfo *pInfo; 589 const uint8_t *inBytes; 590 int32_t format, length, infoLength, itemHeaderLength; 591 UErrorCode errorCode; 592 593 errorCode=U_ZERO_ERROR; 594 pInfo=getDataInfo(pItem->data,pItem->length, infoLength, itemHeaderLength, &errorCode); 595 if(U_FAILURE(errorCode)) { 596 return; // should not occur because readFile() checks headers 597 } 598 599 // find the data format and call the corresponding function, if any 600 format=getDataFormat(pInfo->dataFormat); 601 if(format>=0) { 602 UDataSwapper *ds; 603 604 // TODO: share/cache swappers 605 ds=udata_openSwapper((UBool)pInfo->isBigEndian, pInfo->charsetFamily, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 606 if(U_FAILURE(errorCode)) { 607 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 608 pItem->name, u_errorName(errorCode)); 609 exit(errorCode); 610 } 611 612 ds->printError=printError; 613 ds->printErrorContext=stderr; 614 615 inBytes=pItem->data+itemHeaderLength; 616 length=pItem->length-itemHeaderLength; 617 618 switch(format) { 619 case FMT_RES: 620 ures_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); 621 break; 622 case FMT_CNV: 623 ucnv_enumDependencies(ds, pItem->name, pInfo, inBytes, length, check, context, &errorCode); 624 break; 625 default: 626 break; 627 } 628 629 udata_closeSwapper(ds); 630 631 if(U_FAILURE(errorCode)) { 632 exit(errorCode); 633 } 634 } 635} 636U_NAMESPACE_END 637