1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: package.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005aug25 14* created by: Markus W. Scherer 15* 16* Read, modify, and write ICU .dat data package files. 17* This is an integral part of the icupkg tool, moved to the toolutil library 18* because parts of tool implementations tend to be later shared by 19* other tools. 20* Subsumes functionality and implementation code from 21* gencmn, decmn, and icuswap tools. 22*/ 23 24#include "unicode/utypes.h" 25#include "unicode/putil.h" 26#include "unicode/udata.h" 27#include "cstring.h" 28#include "uarrsort.h" 29#include "ucmndata.h" 30#include "udataswp.h" 31#include "swapimpl.h" 32#include "toolutil.h" 33#include "package.h" 34#include "cmemory.h" 35 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40 41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43// general definitions ----------------------------------------------------- *** 44 45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47/* UDataInfo cf. udata.h */ 48static const UDataInfo dataInfo={ 49 (uint16_t)sizeof(UDataInfo), 50 0, 51 52 U_IS_BIG_ENDIAN, 53 U_CHARSET_FAMILY, 54 (uint8_t)sizeof(UChar), 55 0, 56 57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 58 {1, 0, 0, 0}, /* formatVersion */ 59 {3, 0, 0, 0} /* dataVersion */ 60}; 61 62U_CDECL_BEGIN 63static void U_CALLCONV 64printPackageError(void *context, const char *fmt, va_list args) { 65 vfprintf((FILE *)context, fmt, args); 66} 67U_CDECL_END 68 69static uint16_t 70readSwapUInt16(uint16_t x) { 71 return (uint16_t)((x<<8)|(x>>8)); 72} 73 74// platform types ---------------------------------------------------------- *** 75 76static const char *types="lb?e"; 77 78enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 79 80static inline int32_t 81makeTypeEnum(uint8_t charset, UBool isBigEndian) { 82 return 2*(int32_t)charset+isBigEndian; 83} 84 85static inline int32_t 86makeTypeEnum(char type) { 87 return 88 type == 'l' ? TYPE_L : 89 type == 'b' ? TYPE_B : 90 type == 'e' ? TYPE_E : 91 -1; 92} 93 94static inline char 95makeTypeLetter(uint8_t charset, UBool isBigEndian) { 96 return types[makeTypeEnum(charset, isBigEndian)]; 97} 98 99static inline char 100makeTypeLetter(int32_t typeEnum) { 101 return types[typeEnum]; 102} 103 104static void 105makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 106 int32_t typeEnum=makeTypeEnum(type); 107 charset=(uint8_t)(typeEnum>>1); 108 isBigEndian=(UBool)(typeEnum&1); 109} 110 111U_CFUNC const UDataInfo * 112getDataInfo(const uint8_t *data, int32_t length, 113 int32_t &infoLength, int32_t &headerLength, 114 UErrorCode *pErrorCode) { 115 const DataHeader *pHeader; 116 const UDataInfo *pInfo; 117 118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 119 return NULL; 120 } 121 if( data==NULL || 122 (length>=0 && length<(int32_t)sizeof(DataHeader)) 123 ) { 124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 125 return NULL; 126 } 127 128 pHeader=(const DataHeader *)data; 129 pInfo=&pHeader->info; 130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 131 pHeader->dataHeader.magic1!=0xda || 132 pHeader->dataHeader.magic2!=0x27 || 133 pInfo->sizeofUChar!=2 134 ) { 135 *pErrorCode=U_UNSUPPORTED_ERROR; 136 return NULL; 137 } 138 139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 140 headerLength=pHeader->dataHeader.headerSize; 141 infoLength=pInfo->size; 142 } else { 143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 144 infoLength=readSwapUInt16(pInfo->size); 145 } 146 147 if( headerLength<(int32_t)sizeof(DataHeader) || 148 infoLength<(int32_t)sizeof(UDataInfo) || 149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 150 (length>=0 && length<headerLength) 151 ) { 152 *pErrorCode=U_UNSUPPORTED_ERROR; 153 return NULL; 154 } 155 156 return pInfo; 157} 158 159static int32_t 160getTypeEnumForInputData(const uint8_t *data, int32_t length, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t infoLength, headerLength; 164 165 /* getDataInfo() checks for illegal arguments */ 166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 167 if(pInfo==NULL) { 168 return -1; 169 } 170 171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 172} 173 174// file handling ----------------------------------------------------------- *** 175 176static void 177extractPackageName(const char *filename, 178 char pkg[], int32_t capacity) { 179 const char *basename; 180 int32_t len; 181 182 basename=findBasename(filename); 183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 184 185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 187 basename); 188 exit(U_ILLEGAL_ARGUMENT_ERROR); 189 } 190 191 if(len>=capacity) { 192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 193 basename, (long)capacity); 194 exit(U_ILLEGAL_ARGUMENT_ERROR); 195 } 196 197 memcpy(pkg, basename, len); 198 pkg[len]=0; 199} 200 201static int32_t 202getFileLength(FILE *f) { 203 int32_t length; 204 205 fseek(f, 0, SEEK_END); 206 length=(int32_t)ftell(f); 207 fseek(f, 0, SEEK_SET); 208 return length; 209} 210 211/* 212 * Turn tree separators and alternate file separators into normal file separators. 213 */ 214#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 215#define treeToPath(s) 216#else 217static void 218treeToPath(char *s) { 219 char *t; 220 221 for(t=s; *t!=0; ++t) { 222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 223 *t=U_FILE_SEP_CHAR; 224 } 225 } 226} 227#endif 228 229/* 230 * Turn file separators into tree separators. 231 */ 232#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 233#define pathToTree(s) 234#else 235static void 236pathToTree(char *s) { 237 char *t; 238 239 for(t=s; *t!=0; ++t) { 240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 241 *t=U_TREE_ENTRY_SEP_CHAR; 242 } 243 } 244} 245#endif 246 247/* 248 * Prepend the path (if any) to the name and run the name through treeToName(). 249 */ 250static void 251makeFullFilename(const char *path, const char *name, 252 char *filename, int32_t capacity) { 253 char *s; 254 255 // prepend the path unless NULL or empty 256 if(path!=NULL && path[0]!=0) { 257 if((int32_t)(strlen(path)+1)>=capacity) { 258 fprintf(stderr, "pathname too long: \"%s\"\n", path); 259 exit(U_BUFFER_OVERFLOW_ERROR); 260 } 261 strcpy(filename, path); 262 263 // make sure the path ends with a file separator 264 s=strchr(filename, 0); 265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 266 *s++=U_FILE_SEP_CHAR; 267 } 268 } else { 269 s=filename; 270 } 271 272 // turn the name into a filename, turn tree separators into file separators 273 if((int32_t)((s-filename)+strlen(name))>=capacity) { 274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 275 exit(U_BUFFER_OVERFLOW_ERROR); 276 } 277 strcpy(s, name); 278 treeToPath(s); 279} 280 281static void 282makeFullFilenameAndDirs(const char *path, const char *name, 283 char *filename, int32_t capacity) { 284 char *sep; 285 UErrorCode errorCode; 286 287 makeFullFilename(path, name, filename, capacity); 288 289 // make tree directories 290 errorCode=U_ZERO_ERROR; 291 sep=strchr(filename, 0)-strlen(name); 292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 293 if(sep!=filename) { 294 *sep=0; // truncate temporarily 295 uprv_mkdir(filename, &errorCode); 296 if(U_FAILURE(errorCode)) { 297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 298 exit(U_FILE_ACCESS_ERROR); 299 } 300 } 301 *sep++=U_FILE_SEP_CHAR; // restore file separator character 302 } 303} 304 305static uint8_t * 306readFile(const char *path, const char *name, int32_t &length, char &type) { 307 char filename[1024]; 308 FILE *file; 309 uint8_t *data; 310 UErrorCode errorCode; 311 int32_t fileLength, typeEnum; 312 313 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 314 315 /* open the input file, get its length, allocate memory for it, read the file */ 316 file=fopen(filename, "rb"); 317 if(file==NULL) { 318 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 319 exit(U_FILE_ACCESS_ERROR); 320 } 321 322 /* get the file length */ 323 fileLength=getFileLength(file); 324 if(ferror(file) || fileLength<=0) { 325 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 326 fclose(file); 327 exit(U_FILE_ACCESS_ERROR); 328 } 329 330 /* allocate the buffer, pad to multiple of 16 */ 331 length=(fileLength+0xf)&~0xf; 332 data=(uint8_t *)uprv_malloc(length); 333 if(data==NULL) { 334 fclose(file); 335 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 336 exit(U_MEMORY_ALLOCATION_ERROR); 337 } 338 339 /* read the file */ 340 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 341 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 342 fclose(file); 343 free(data); 344 exit(U_FILE_ACCESS_ERROR); 345 } 346 347 /* pad the file to a multiple of 16 using the usual padding byte */ 348 if(fileLength<length) { 349 memset(data+fileLength, 0xaa, length-fileLength); 350 } 351 352 fclose(file); 353 354 // minimum check for ICU-format data 355 errorCode=U_ZERO_ERROR; 356 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 357 if(typeEnum<0 || U_FAILURE(errorCode)) { 358 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 359 free(data); 360#if !UCONFIG_NO_LEGACY_CONVERSION 361 exit(U_INVALID_FORMAT_ERROR); 362#else 363 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 364 exit(0); 365#endif 366 } 367 type=makeTypeLetter(typeEnum); 368 369 return data; 370} 371 372// .dat package file representation ---------------------------------------- *** 373 374U_CDECL_BEGIN 375 376static int32_t U_CALLCONV 377compareItems(const void * /*context*/, const void *left, const void *right) { 378 U_NAMESPACE_USE 379 380 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 381} 382 383U_CDECL_END 384 385U_NAMESPACE_BEGIN 386 387Package::Package() 388 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { 389 inPkgName[0]=0; 390 pkgPrefix[0]=0; 391 inData=NULL; 392 inLength=0; 393 inCharset=U_CHARSET_FAMILY; 394 inIsBigEndian=U_IS_BIG_ENDIAN; 395 396 itemCount=0; 397 itemMax=0; 398 items=NULL; 399 400 inStringTop=outStringTop=0; 401 402 matchMode=0; 403 findPrefix=findSuffix=NULL; 404 findPrefixLength=findSuffixLength=0; 405 findNextIndex=-1; 406 407 // create a header for an empty package 408 DataHeader *pHeader; 409 pHeader=(DataHeader *)header; 410 pHeader->dataHeader.magic1=0xda; 411 pHeader->dataHeader.magic2=0x27; 412 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 413 headerLength=(int32_t)(4+sizeof(dataInfo)); 414 if(headerLength&0xf) { 415 /* NUL-pad the header to a multiple of 16 */ 416 int32_t length=(headerLength+0xf)&~0xf; 417 memset(header+headerLength, 0, length-headerLength); 418 headerLength=length; 419 } 420 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 421} 422 423Package::~Package() { 424 int32_t idx; 425 426 free(inData); 427 428 for(idx=0; idx<itemCount; ++idx) { 429 if(items[idx].isDataOwned) { 430 free(items[idx].data); 431 } 432 } 433 434 uprv_free((void*)items); 435} 436 437void 438Package::setPrefix(const char *p) { 439 if(strlen(p)>=sizeof(pkgPrefix)) { 440 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); 441 exit(U_ILLEGAL_ARGUMENT_ERROR); 442 } 443 strcpy(pkgPrefix, p); 444} 445 446void 447Package::readPackage(const char *filename) { 448 UDataSwapper *ds; 449 const UDataInfo *pInfo; 450 UErrorCode errorCode; 451 452 const uint8_t *inBytes; 453 454 int32_t length, offset, i; 455 int32_t itemLength, typeEnum; 456 char type; 457 458 const UDataOffsetTOCEntry *inEntries; 459 460 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 461 462 /* read the file */ 463 inData=readFile(NULL, filename, inLength, type); 464 length=inLength; 465 466 /* 467 * swap the header - even if the swapping itself is a no-op 468 * because it tells us the header length 469 */ 470 errorCode=U_ZERO_ERROR; 471 makeTypeProps(type, inCharset, inIsBigEndian); 472 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 473 if(U_FAILURE(errorCode)) { 474 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 475 filename, u_errorName(errorCode)); 476 exit(errorCode); 477 } 478 479 ds->printError=printPackageError; 480 ds->printErrorContext=stderr; 481 482 headerLength=sizeof(header); 483 if(length<headerLength) { 484 headerLength=length; 485 } 486 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 487 if(U_FAILURE(errorCode)) { 488 exit(errorCode); 489 } 490 491 /* check data format and format version */ 492 pInfo=(const UDataInfo *)((const char *)inData+4); 493 if(!( 494 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 495 pInfo->dataFormat[1]==0x6d && 496 pInfo->dataFormat[2]==0x6e && 497 pInfo->dataFormat[3]==0x44 && 498 pInfo->formatVersion[0]==1 499 )) { 500 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 501 pInfo->dataFormat[0], pInfo->dataFormat[1], 502 pInfo->dataFormat[2], pInfo->dataFormat[3], 503 pInfo->formatVersion[0]); 504 exit(U_UNSUPPORTED_ERROR); 505 } 506 inIsBigEndian=(UBool)pInfo->isBigEndian; 507 inCharset=pInfo->charsetFamily; 508 509 inBytes=(const uint8_t *)inData+headerLength; 510 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 511 512 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 513 length-=headerLength; 514 if(length<4) { 515 /* itemCount does not fit */ 516 offset=0x7fffffff; 517 } else { 518 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 519 setItemCapacity(itemCount); /* resize so there's space */ 520 if(itemCount==0) { 521 offset=4; 522 } else if(length<(4+8*itemCount)) { 523 /* ToC table does not fit */ 524 offset=0x7fffffff; 525 } else { 526 /* offset of the last item plus at least 20 bytes for its header */ 527 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 528 } 529 } 530 if(length<offset) { 531 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 532 (long)length); 533 exit(U_INDEX_OUTOFBOUNDS_ERROR); 534 } 535 /* do not modify the package length variable until the last item's length is set */ 536 537 if(itemCount<=0) { 538 if(doAutoPrefix) { 539 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); 540 exit(U_INVALID_FORMAT_ERROR); 541 } 542 } else { 543 char prefix[MAX_PKG_NAME_LENGTH+4]; 544 char *s, *inItemStrings; 545 546 if(itemCount>itemMax) { 547 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 548 exit(U_BUFFER_OVERFLOW_ERROR); 549 } 550 551 /* swap the item name strings */ 552 int32_t stringsOffset=4+8*itemCount; 553 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 554 555 // don't include padding bytes at the end of the item names 556 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 557 --itemLength; 558 } 559 560 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 561 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 562 exit(U_BUFFER_OVERFLOW_ERROR); 563 } 564 565 inItemStrings=inStrings+inStringTop; 566 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 567 if(U_FAILURE(errorCode)) { 568 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 569 exit(U_INVALID_FORMAT_ERROR); 570 } 571 inStringTop+=itemLength; 572 573 // reset the Item entries 574 memset(items, 0, itemCount*sizeof(Item)); 575 576 /* 577 * Get the common prefix of the items. 578 * New-style ICU .dat packages use tree separators ('/') between package names, 579 * tree names, and item names, 580 * while old-style ICU .dat packages (before multi-tree support) 581 * use an underscore ('_') between package and item names. 582 */ 583 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 584 s=inItemStrings+offset; // name of the first entry 585 int32_t prefixLength; 586 if(doAutoPrefix) { 587 // Use the first entry's prefix. Must be a new-style package. 588 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); 589 if(prefixLimit==NULL) { 590 fprintf(stderr, 591 "icupkg: --auto_toc_prefix[_with_type] but " 592 "the first entry \"%s\" does not contain a '%c'\n", 593 s, U_TREE_ENTRY_SEP_CHAR); 594 exit(U_INVALID_FORMAT_ERROR); 595 } 596 prefixLength=(int32_t)(prefixLimit-s); 597 if(prefixLength==0 || prefixLength>=LENGTHOF(pkgPrefix)) { 598 fprintf(stderr, 599 "icupkg: --auto_toc_prefix[_with_type] but " 600 "the prefix of the first entry \"%s\" is empty or too long\n", 601 s); 602 exit(U_INVALID_FORMAT_ERROR); 603 } 604 if(prefixEndsWithType && s[prefixLength-1]!=type) { 605 fprintf(stderr, 606 "icupkg: --auto_toc_prefix_with_type but " 607 "the prefix of the first entry \"%s\" does not end with '%c'\n", 608 s, type); 609 exit(U_INVALID_FORMAT_ERROR); 610 } 611 memcpy(pkgPrefix, s, prefixLength); 612 memcpy(prefix, s, ++prefixLength); // include the / 613 } else { 614 // Use the package basename as prefix. 615 int32_t inPkgNameLength=strlen(inPkgName); 616 memcpy(prefix, inPkgName, inPkgNameLength); 617 prefixLength=inPkgNameLength; 618 619 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 620 0==memcmp(s, inPkgName, inPkgNameLength) && 621 s[inPkgNameLength]=='_' 622 ) { 623 // old-style .dat package 624 prefix[prefixLength++]='_'; 625 } else { 626 // new-style .dat package 627 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 628 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 629 // then the test in the loop below will fail 630 } 631 } 632 prefix[prefixLength]=0; 633 634 /* read the ToC table */ 635 for(i=0; i<itemCount; ++i) { 636 // skip the package part of the item name, error if it does not match the actual package name 637 // or if nothing follows the package name 638 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 639 s=inItemStrings+offset; 640 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 641 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 642 s, prefix); 643 exit(U_INVALID_FORMAT_ERROR); 644 } 645 items[i].name=s+prefixLength; 646 647 // set the item's data 648 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 649 if(i>0) { 650 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 651 652 // set the previous item's platform type 653 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 654 if(typeEnum<0 || U_FAILURE(errorCode)) { 655 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 656 exit(U_INVALID_FORMAT_ERROR); 657 } 658 items[i-1].type=makeTypeLetter(typeEnum); 659 } 660 items[i].isDataOwned=FALSE; 661 } 662 // set the last item's length 663 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 664 665 // set the last item's platform type 666 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 667 if(typeEnum<0 || U_FAILURE(errorCode)) { 668 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 669 exit(U_INVALID_FORMAT_ERROR); 670 } 671 items[itemCount-1].type=makeTypeLetter(typeEnum); 672 673 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 674 // sort the item names for the local charset 675 sortItems(); 676 } 677 } 678 679 udata_closeSwapper(ds); 680} 681 682char 683Package::getInType() { 684 return makeTypeLetter(inCharset, inIsBigEndian); 685} 686 687void 688Package::writePackage(const char *filename, char outType, const char *comment) { 689 char prefix[MAX_PKG_NAME_LENGTH+4]; 690 UDataOffsetTOCEntry entry; 691 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 692 FILE *file; 693 Item *pItem; 694 char *name; 695 UErrorCode errorCode; 696 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 697 uint8_t outCharset; 698 UBool outIsBigEndian; 699 700 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 701 702 // if there is an explicit comment, then use it, else use what's in the current header 703 if(comment!=NULL) { 704 /* get the header size minus the current comment */ 705 DataHeader *pHeader; 706 int32_t length; 707 708 pHeader=(DataHeader *)header; 709 headerLength=4+pHeader->info.size; 710 length=(int32_t)strlen(comment); 711 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 712 fprintf(stderr, "icupkg: comment too long\n"); 713 exit(U_BUFFER_OVERFLOW_ERROR); 714 } 715 memcpy(header+headerLength, comment, length+1); 716 headerLength+=length; 717 if(headerLength&0xf) { 718 /* NUL-pad the header to a multiple of 16 */ 719 length=(headerLength+0xf)&~0xf; 720 memset(header+headerLength, 0, length-headerLength); 721 headerLength=length; 722 } 723 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 724 } 725 726 makeTypeProps(outType, outCharset, outIsBigEndian); 727 728 // open (TYPE_COUNT-2) swappers 729 // one is a no-op for local type==outType 730 // one type (TYPE_LE) is bogus 731 errorCode=U_ZERO_ERROR; 732 i=makeTypeEnum(outType); 733 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 734 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 735 ds[TYPE_LE]=NULL; 736 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 737 if(U_FAILURE(errorCode)) { 738 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 739 exit(errorCode); 740 } 741 for(i=0; i<TYPE_COUNT; ++i) { 742 if(ds[i]!=NULL) { 743 ds[i]->printError=printPackageError; 744 ds[i]->printErrorContext=stderr; 745 } 746 } 747 748 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 749 750 // create the file and write its contents 751 file=fopen(filename, "wb"); 752 if(file==NULL) { 753 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 754 exit(U_FILE_ACCESS_ERROR); 755 } 756 757 // swap and write the header 758 if(dsLocalToOut!=NULL) { 759 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 760 if(U_FAILURE(errorCode)) { 761 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 762 exit(errorCode); 763 } 764 } 765 length=(int32_t)fwrite(header, 1, headerLength, file); 766 if(length!=headerLength) { 767 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 768 exit(U_FILE_ACCESS_ERROR); 769 } 770 771 // prepare and swap the package name with a tree separator 772 // for prepending to item names 773 if(pkgPrefix[0]==0) { 774 prefixLength=(int32_t)strlen(prefix); 775 } else { 776 prefixLength=(int32_t)strlen(pkgPrefix); 777 memcpy(prefix, pkgPrefix, prefixLength); 778 if(prefixEndsWithType) { 779 prefix[prefixLength-1]=outType; 780 } 781 } 782 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 783 prefix[prefixLength]=0; 784 if(dsLocalToOut!=NULL) { 785 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 786 if(U_FAILURE(errorCode)) { 787 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 788 exit(errorCode); 789 } 790 791 // swap and sort the item names (sorting needs to be done in the output charset) 792 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 793 if(U_FAILURE(errorCode)) { 794 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 795 exit(errorCode); 796 } 797 sortItems(); 798 } 799 800 // create the output item names in sorted order, with the package name prepended to each 801 for(i=0; i<itemCount; ++i) { 802 length=(int32_t)strlen(items[i].name); 803 name=allocString(FALSE, length+prefixLength); 804 memcpy(name, prefix, prefixLength); 805 memcpy(name+prefixLength, items[i].name, length+1); 806 items[i].name=name; 807 } 808 809 // calculate offsets for item names and items, pad to 16-align items 810 // align only the first item; each item's length is a multiple of 16 811 basenameOffset=4+8*itemCount; 812 offset=basenameOffset+outStringTop; 813 if((length=(offset&15))!=0) { 814 length=16-length; 815 memset(allocString(FALSE, length-1), 0xaa, length); 816 offset+=length; 817 } 818 819 // write the table of contents 820 // first the itemCount 821 outInt32=itemCount; 822 if(dsLocalToOut!=NULL) { 823 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 824 if(U_FAILURE(errorCode)) { 825 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 826 exit(errorCode); 827 } 828 } 829 length=(int32_t)fwrite(&outInt32, 1, 4, file); 830 if(length!=4) { 831 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 832 exit(U_FILE_ACCESS_ERROR); 833 } 834 835 // then write the item entries (and collect the maxItemLength) 836 maxItemLength=0; 837 for(i=0; i<itemCount; ++i) { 838 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 839 entry.dataOffset=(uint32_t)offset; 840 if(dsLocalToOut!=NULL) { 841 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 842 if(U_FAILURE(errorCode)) { 843 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 844 exit(errorCode); 845 } 846 } 847 length=(int32_t)fwrite(&entry, 1, 8, file); 848 if(length!=8) { 849 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 850 exit(U_FILE_ACCESS_ERROR); 851 } 852 853 length=items[i].length; 854 if(length>maxItemLength) { 855 maxItemLength=length; 856 } 857 offset+=length; 858 } 859 860 // write the item names 861 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 862 if(length!=outStringTop) { 863 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 864 exit(U_FILE_ACCESS_ERROR); 865 } 866 867 // write the items 868 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 869 int32_t type=makeTypeEnum(pItem->type); 870 if(ds[type]!=NULL) { 871 // swap each item from its platform properties to the desired ones 872 udata_swap( 873 ds[type], 874 pItem->data, pItem->length, pItem->data, 875 &errorCode); 876 if(U_FAILURE(errorCode)) { 877 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 878 exit(errorCode); 879 } 880 } 881 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 882 if(length!=pItem->length) { 883 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 884 exit(U_FILE_ACCESS_ERROR); 885 } 886 } 887 888 if(ferror(file)) { 889 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 890 exit(U_FILE_ACCESS_ERROR); 891 } 892 893 fclose(file); 894 for(i=0; i<TYPE_COUNT; ++i) { 895 udata_closeSwapper(ds[i]); 896 } 897} 898 899int32_t 900Package::findItem(const char *name, int32_t length) const { 901 int32_t i, start, limit; 902 int result; 903 904 /* do a binary search for the string */ 905 start=0; 906 limit=itemCount; 907 while(start<limit) { 908 i=(start+limit)/2; 909 if(length>=0) { 910 result=strncmp(name, items[i].name, length); 911 } else { 912 result=strcmp(name, items[i].name); 913 } 914 915 if(result==0) { 916 /* found */ 917 if(length>=0) { 918 /* 919 * if we compared just prefixes, then we may need to back up 920 * to the first item with this prefix 921 */ 922 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 923 --i; 924 } 925 } 926 return i; 927 } else if(result<0) { 928 limit=i; 929 } else /* result>0 */ { 930 start=i+1; 931 } 932 } 933 934 return ~start; /* not found, return binary-not of the insertion point */ 935} 936 937void 938Package::findItems(const char *pattern) { 939 const char *wild; 940 941 if(pattern==NULL || *pattern==0) { 942 findNextIndex=-1; 943 return; 944 } 945 946 findPrefix=pattern; 947 findSuffix=NULL; 948 findSuffixLength=0; 949 950 wild=strchr(pattern, '*'); 951 if(wild==NULL) { 952 // no wildcard 953 findPrefixLength=(int32_t)strlen(pattern); 954 } else { 955 // one wildcard 956 findPrefixLength=(int32_t)(wild-pattern); 957 findSuffix=wild+1; 958 findSuffixLength=(int32_t)strlen(findSuffix); 959 if(NULL!=strchr(findSuffix, '*')) { 960 // two or more wildcards 961 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 962 exit(U_PARSE_ERROR); 963 } 964 } 965 966 if(findPrefixLength==0) { 967 findNextIndex=0; 968 } else { 969 findNextIndex=findItem(findPrefix, findPrefixLength); 970 } 971} 972 973int32_t 974Package::findNextItem() { 975 const char *name, *middle, *treeSep; 976 int32_t idx, nameLength, middleLength; 977 978 if(findNextIndex<0) { 979 return -1; 980 } 981 982 while(findNextIndex<itemCount) { 983 idx=findNextIndex++; 984 name=items[idx].name; 985 nameLength=(int32_t)strlen(name); 986 if(nameLength<(findPrefixLength+findSuffixLength)) { 987 // item name too short for prefix & suffix 988 continue; 989 } 990 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 991 // left the range of names with this prefix 992 break; 993 } 994 middle=name+findPrefixLength; 995 middleLength=nameLength-findPrefixLength-findSuffixLength; 996 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 997 // suffix does not match 998 continue; 999 } 1000 // prefix & suffix match 1001 1002 if(matchMode&MATCH_NOSLASH) { 1003 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 1004 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 1005 // the middle (matching the * wildcard) contains a tree separator / 1006 continue; 1007 } 1008 } 1009 1010 // found a matching item 1011 return idx; 1012 } 1013 1014 // no more items 1015 findNextIndex=-1; 1016 return -1; 1017} 1018 1019void 1020Package::setMatchMode(uint32_t mode) { 1021 matchMode=mode; 1022} 1023 1024void 1025Package::addItem(const char *name) { 1026 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 1027} 1028 1029void 1030Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 1031 int32_t idx; 1032 1033 idx=findItem(name); 1034 if(idx<0) { 1035 // new item, make space at the insertion point 1036 ensureItemCapacity(); 1037 // move the following items down 1038 idx=~idx; 1039 if(idx<itemCount) { 1040 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 1041 } 1042 ++itemCount; 1043 1044 // reset this Item entry 1045 memset(items+idx, 0, sizeof(Item)); 1046 1047 // copy the item's name 1048 items[idx].name=allocString(TRUE, strlen(name)); 1049 strcpy(items[idx].name, name); 1050 pathToTree(items[idx].name); 1051 } else { 1052 // same-name item found, replace it 1053 if(items[idx].isDataOwned) { 1054 free(items[idx].data); 1055 } 1056 1057 // keep the item's name since it is the same 1058 } 1059 1060 // set the item's data 1061 items[idx].data=data; 1062 items[idx].length=length; 1063 items[idx].isDataOwned=isDataOwned; 1064 items[idx].type=type; 1065} 1066 1067void 1068Package::addFile(const char *filesPath, const char *name) { 1069 uint8_t *data; 1070 int32_t length; 1071 char type; 1072 1073 data=readFile(filesPath, name, length, type); 1074 // readFile() exits the tool if it fails 1075 addItem(name, data, length, TRUE, type); 1076} 1077 1078void 1079Package::addItems(const Package &listPkg) { 1080 const Item *pItem; 1081 int32_t i; 1082 1083 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1084 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1085 } 1086} 1087 1088void 1089Package::removeItem(int32_t idx) { 1090 if(idx>=0) { 1091 // remove the item 1092 if(items[idx].isDataOwned) { 1093 free(items[idx].data); 1094 } 1095 1096 // move the following items up 1097 if((idx+1)<itemCount) { 1098 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1099 } 1100 --itemCount; 1101 1102 if(idx<=findNextIndex) { 1103 --findNextIndex; 1104 } 1105 } 1106} 1107 1108void 1109Package::removeItems(const char *pattern) { 1110 int32_t idx; 1111 1112 findItems(pattern); 1113 while((idx=findNextItem())>=0) { 1114 removeItem(idx); 1115 } 1116} 1117 1118void 1119Package::removeItems(const Package &listPkg) { 1120 const Item *pItem; 1121 int32_t i; 1122 1123 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1124 removeItems(pItem->name); 1125 } 1126} 1127 1128void 1129Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1130 char filename[1024]; 1131 UDataSwapper *ds; 1132 FILE *file; 1133 Item *pItem; 1134 int32_t fileLength; 1135 uint8_t itemCharset, outCharset; 1136 UBool itemIsBigEndian, outIsBigEndian; 1137 1138 if(idx<0 || itemCount<=idx) { 1139 return; 1140 } 1141 pItem=items+idx; 1142 1143 // swap the data to the outType 1144 // outType==0: don't swap 1145 if(outType!=0 && pItem->type!=outType) { 1146 // open the swapper 1147 UErrorCode errorCode=U_ZERO_ERROR; 1148 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1149 makeTypeProps(outType, outCharset, outIsBigEndian); 1150 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1151 if(U_FAILURE(errorCode)) { 1152 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1153 (long)idx, u_errorName(errorCode)); 1154 exit(errorCode); 1155 } 1156 1157 ds->printError=printPackageError; 1158 ds->printErrorContext=stderr; 1159 1160 // swap the item from its platform properties to the desired ones 1161 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1162 if(U_FAILURE(errorCode)) { 1163 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1164 exit(errorCode); 1165 } 1166 udata_closeSwapper(ds); 1167 pItem->type=outType; 1168 } 1169 1170 // create the file and write its contents 1171 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1172 file=fopen(filename, "wb"); 1173 if(file==NULL) { 1174 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1175 exit(U_FILE_ACCESS_ERROR); 1176 } 1177 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1178 1179 if(ferror(file) || fileLength!=pItem->length) { 1180 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1181 exit(U_FILE_ACCESS_ERROR); 1182 } 1183 fclose(file); 1184} 1185 1186void 1187Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1188 extractItem(filesPath, items[idx].name, idx, outType); 1189} 1190 1191void 1192Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1193 int32_t idx; 1194 1195 findItems(pattern); 1196 while((idx=findNextItem())>=0) { 1197 extractItem(filesPath, idx, outType); 1198 } 1199} 1200 1201void 1202Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1203 const Item *pItem; 1204 int32_t i; 1205 1206 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1207 extractItems(filesPath, pItem->name, outType); 1208 } 1209} 1210 1211int32_t 1212Package::getItemCount() const { 1213 return itemCount; 1214} 1215 1216const Item * 1217Package::getItem(int32_t idx) const { 1218 if (0 <= idx && idx < itemCount) { 1219 return &items[idx]; 1220 } 1221 return NULL; 1222} 1223 1224void 1225Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1226 // check dependency: make sure the target item is in the package 1227 Package *me=(Package *)context; 1228 if(me->findItem(targetName)<0) { 1229 me->isMissingItems=TRUE; 1230 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1231 } 1232} 1233 1234UBool 1235Package::checkDependencies() { 1236 isMissingItems=FALSE; 1237 enumDependencies(this, checkDependency); 1238 return (UBool)!isMissingItems; 1239} 1240 1241void 1242Package::enumDependencies(void *context, CheckDependency check) { 1243 int32_t i; 1244 1245 for(i=0; i<itemCount; ++i) { 1246 enumDependencies(items+i, context, check); 1247 } 1248} 1249 1250char * 1251Package::allocString(UBool in, int32_t length) { 1252 char *p; 1253 int32_t top; 1254 1255 if(in) { 1256 top=inStringTop; 1257 p=inStrings+top; 1258 } else { 1259 top=outStringTop; 1260 p=outStrings+top; 1261 } 1262 top+=length+1; 1263 1264 if(top>STRING_STORE_SIZE) { 1265 fprintf(stderr, "icupkg: string storage overflow\n"); 1266 exit(U_BUFFER_OVERFLOW_ERROR); 1267 } 1268 if(in) { 1269 inStringTop=top; 1270 } else { 1271 outStringTop=top; 1272 } 1273 return p; 1274} 1275 1276void 1277Package::sortItems() { 1278 UErrorCode errorCode=U_ZERO_ERROR; 1279 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1280 if(U_FAILURE(errorCode)) { 1281 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1282 exit(errorCode); 1283 } 1284} 1285 1286void Package::setItemCapacity(int32_t max) 1287{ 1288 if(max<=itemMax) { 1289 return; 1290 } 1291 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1292 Item *oldItems = items; 1293 if(newItems == NULL) { 1294 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", 1295 (unsigned long)max*sizeof(items[0]), max); 1296 exit(U_MEMORY_ALLOCATION_ERROR); 1297 } 1298 if(items && itemCount>0) { 1299 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1300 } 1301 itemMax = max; 1302 items = newItems; 1303 uprv_free(oldItems); 1304} 1305 1306void Package::ensureItemCapacity() 1307{ 1308 if((itemCount+1)>itemMax) { 1309 setItemCapacity(itemCount+kItemsChunk); 1310 } 1311} 1312 1313U_NAMESPACE_END 1314