1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2013, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: package.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005aug25 14* created by: Markus W. Scherer 15* 16* Read, modify, and write ICU .dat data package files. 17* This is an integral part of the icupkg tool, moved to the toolutil library 18* because parts of tool implementations tend to be later shared by 19* other tools. 20* Subsumes functionality and implementation code from 21* gencmn, decmn, and icuswap tools. 22*/ 23 24#include "unicode/utypes.h" 25#include "unicode/putil.h" 26#include "unicode/udata.h" 27#include "cstring.h" 28#include "uarrsort.h" 29#include "ucmndata.h" 30#include "udataswp.h" 31#include "swapimpl.h" 32#include "toolutil.h" 33#include "package.h" 34#include "cmemory.h" 35 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40 41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43// general definitions ----------------------------------------------------- *** 44 45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47/* UDataInfo cf. udata.h */ 48static const UDataInfo dataInfo={ 49 (uint16_t)sizeof(UDataInfo), 50 0, 51 52 U_IS_BIG_ENDIAN, 53 U_CHARSET_FAMILY, 54 (uint8_t)sizeof(UChar), 55 0, 56 57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 58 {1, 0, 0, 0}, /* formatVersion */ 59 {3, 0, 0, 0} /* dataVersion */ 60}; 61 62U_CDECL_BEGIN 63static void U_CALLCONV 64printPackageError(void *context, const char *fmt, va_list args) { 65 vfprintf((FILE *)context, fmt, args); 66} 67U_CDECL_END 68 69static uint16_t 70readSwapUInt16(uint16_t x) { 71 return (uint16_t)((x<<8)|(x>>8)); 72} 73 74// platform types ---------------------------------------------------------- *** 75 76static const char *types="lb?e"; 77 78enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 79 80static inline int32_t 81makeTypeEnum(uint8_t charset, UBool isBigEndian) { 82 return 2*(int32_t)charset+isBigEndian; 83} 84 85static inline int32_t 86makeTypeEnum(char type) { 87 return 88 type == 'l' ? TYPE_L : 89 type == 'b' ? TYPE_B : 90 type == 'e' ? TYPE_E : 91 -1; 92} 93 94static inline char 95makeTypeLetter(uint8_t charset, UBool isBigEndian) { 96 return types[makeTypeEnum(charset, isBigEndian)]; 97} 98 99static inline char 100makeTypeLetter(int32_t typeEnum) { 101 return types[typeEnum]; 102} 103 104static void 105makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 106 int32_t typeEnum=makeTypeEnum(type); 107 charset=(uint8_t)(typeEnum>>1); 108 isBigEndian=(UBool)(typeEnum&1); 109} 110 111U_CFUNC const UDataInfo * 112getDataInfo(const uint8_t *data, int32_t length, 113 int32_t &infoLength, int32_t &headerLength, 114 UErrorCode *pErrorCode) { 115 const DataHeader *pHeader; 116 const UDataInfo *pInfo; 117 118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 119 return NULL; 120 } 121 if( data==NULL || 122 (length>=0 && length<(int32_t)sizeof(DataHeader)) 123 ) { 124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 125 return NULL; 126 } 127 128 pHeader=(const DataHeader *)data; 129 pInfo=&pHeader->info; 130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 131 pHeader->dataHeader.magic1!=0xda || 132 pHeader->dataHeader.magic2!=0x27 || 133 pInfo->sizeofUChar!=2 134 ) { 135 *pErrorCode=U_UNSUPPORTED_ERROR; 136 return NULL; 137 } 138 139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 140 headerLength=pHeader->dataHeader.headerSize; 141 infoLength=pInfo->size; 142 } else { 143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 144 infoLength=readSwapUInt16(pInfo->size); 145 } 146 147 if( headerLength<(int32_t)sizeof(DataHeader) || 148 infoLength<(int32_t)sizeof(UDataInfo) || 149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 150 (length>=0 && length<headerLength) 151 ) { 152 *pErrorCode=U_UNSUPPORTED_ERROR; 153 return NULL; 154 } 155 156 return pInfo; 157} 158 159static int32_t 160getTypeEnumForInputData(const uint8_t *data, int32_t length, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t infoLength, headerLength; 164 165 /* getDataInfo() checks for illegal arguments */ 166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 167 if(pInfo==NULL) { 168 return -1; 169 } 170 171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 172} 173 174// file handling ----------------------------------------------------------- *** 175 176static void 177extractPackageName(const char *filename, 178 char pkg[], int32_t capacity) { 179 const char *basename; 180 int32_t len; 181 182 basename=findBasename(filename); 183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 184 185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 187 basename); 188 exit(U_ILLEGAL_ARGUMENT_ERROR); 189 } 190 191 if(len>=capacity) { 192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 193 basename, (long)capacity); 194 exit(U_ILLEGAL_ARGUMENT_ERROR); 195 } 196 197 memcpy(pkg, basename, len); 198 pkg[len]=0; 199} 200 201static int32_t 202getFileLength(FILE *f) { 203 int32_t length; 204 205 fseek(f, 0, SEEK_END); 206 length=(int32_t)ftell(f); 207 fseek(f, 0, SEEK_SET); 208 return length; 209} 210 211/* 212 * Turn tree separators and alternate file separators into normal file separators. 213 */ 214#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 215#define treeToPath(s) 216#else 217static void 218treeToPath(char *s) { 219 char *t; 220 221 for(t=s; *t!=0; ++t) { 222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 223 *t=U_FILE_SEP_CHAR; 224 } 225 } 226} 227#endif 228 229/* 230 * Turn file separators into tree separators. 231 */ 232#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 233#define pathToTree(s) 234#else 235static void 236pathToTree(char *s) { 237 char *t; 238 239 for(t=s; *t!=0; ++t) { 240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 241 *t=U_TREE_ENTRY_SEP_CHAR; 242 } 243 } 244} 245#endif 246 247/* 248 * Prepend the path (if any) to the name and run the name through treeToName(). 249 */ 250static void 251makeFullFilename(const char *path, const char *name, 252 char *filename, int32_t capacity) { 253 char *s; 254 255 // prepend the path unless NULL or empty 256 if(path!=NULL && path[0]!=0) { 257 if((int32_t)(strlen(path)+1)>=capacity) { 258 fprintf(stderr, "pathname too long: \"%s\"\n", path); 259 exit(U_BUFFER_OVERFLOW_ERROR); 260 } 261 strcpy(filename, path); 262 263 // make sure the path ends with a file separator 264 s=strchr(filename, 0); 265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 266 *s++=U_FILE_SEP_CHAR; 267 } 268 } else { 269 s=filename; 270 } 271 272 // turn the name into a filename, turn tree separators into file separators 273 if((int32_t)((s-filename)+strlen(name))>=capacity) { 274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 275 exit(U_BUFFER_OVERFLOW_ERROR); 276 } 277 strcpy(s, name); 278 treeToPath(s); 279} 280 281static void 282makeFullFilenameAndDirs(const char *path, const char *name, 283 char *filename, int32_t capacity) { 284 char *sep; 285 UErrorCode errorCode; 286 287 makeFullFilename(path, name, filename, capacity); 288 289 // make tree directories 290 errorCode=U_ZERO_ERROR; 291 sep=strchr(filename, 0)-strlen(name); 292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 293 if(sep!=filename) { 294 *sep=0; // truncate temporarily 295 uprv_mkdir(filename, &errorCode); 296 if(U_FAILURE(errorCode)) { 297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 298 exit(U_FILE_ACCESS_ERROR); 299 } 300 } 301 *sep++=U_FILE_SEP_CHAR; // restore file separator character 302 } 303} 304 305static uint8_t * 306readFile(const char *path, const char *name, int32_t &length, char &type) { 307 char filename[1024]; 308 FILE *file; 309 uint8_t *data; 310 UErrorCode errorCode; 311 int32_t fileLength, typeEnum; 312 313 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 314 315 /* open the input file, get its length, allocate memory for it, read the file */ 316 file=fopen(filename, "rb"); 317 if(file==NULL) { 318 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 319 exit(U_FILE_ACCESS_ERROR); 320 } 321 322 /* get the file length */ 323 fileLength=getFileLength(file); 324 if(ferror(file) || fileLength<=0) { 325 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 326 fclose(file); 327 exit(U_FILE_ACCESS_ERROR); 328 } 329 330 /* allocate the buffer, pad to multiple of 16 */ 331 length=(fileLength+0xf)&~0xf; 332 data=(uint8_t *)uprv_malloc(length); 333 if(data==NULL) { 334 fclose(file); 335 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 336 exit(U_MEMORY_ALLOCATION_ERROR); 337 } 338 339 /* read the file */ 340 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 341 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 342 fclose(file); 343 free(data); 344 exit(U_FILE_ACCESS_ERROR); 345 } 346 347 /* pad the file to a multiple of 16 using the usual padding byte */ 348 if(fileLength<length) { 349 memset(data+fileLength, 0xaa, length-fileLength); 350 } 351 352 fclose(file); 353 354 // minimum check for ICU-format data 355 errorCode=U_ZERO_ERROR; 356 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 357 if(typeEnum<0 || U_FAILURE(errorCode)) { 358 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 359 free(data); 360#if !UCONFIG_NO_LEGACY_CONVERSION 361 exit(U_INVALID_FORMAT_ERROR); 362#else 363 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 364 exit(0); 365#endif 366 } 367 type=makeTypeLetter(typeEnum); 368 369 return data; 370} 371 372// .dat package file representation ---------------------------------------- *** 373 374U_CDECL_BEGIN 375 376static int32_t U_CALLCONV 377compareItems(const void * /*context*/, const void *left, const void *right) { 378 U_NAMESPACE_USE 379 380 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 381} 382 383U_CDECL_END 384 385U_NAMESPACE_BEGIN 386 387Package::Package() 388 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { 389 inPkgName[0]=0; 390 pkgPrefix[0]=0; 391 inData=NULL; 392 inLength=0; 393 inCharset=U_CHARSET_FAMILY; 394 inIsBigEndian=U_IS_BIG_ENDIAN; 395 396 itemCount=0; 397 itemMax=0; 398 items=NULL; 399 400 inStringTop=outStringTop=0; 401 402 matchMode=0; 403 findPrefix=findSuffix=NULL; 404 findPrefixLength=findSuffixLength=0; 405 findNextIndex=-1; 406 407 // create a header for an empty package 408 DataHeader *pHeader; 409 pHeader=(DataHeader *)header; 410 pHeader->dataHeader.magic1=0xda; 411 pHeader->dataHeader.magic2=0x27; 412 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 413 headerLength=(int32_t)(4+sizeof(dataInfo)); 414 if(headerLength&0xf) { 415 /* NUL-pad the header to a multiple of 16 */ 416 int32_t length=(headerLength+0xf)&~0xf; 417 memset(header+headerLength, 0, length-headerLength); 418 headerLength=length; 419 } 420 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 421} 422 423Package::~Package() { 424 int32_t idx; 425 426 free(inData); 427 428 for(idx=0; idx<itemCount; ++idx) { 429 if(items[idx].isDataOwned) { 430 free(items[idx].data); 431 } 432 } 433 434 uprv_free((void*)items); 435} 436 437void 438Package::setPrefix(const char *p) { 439 if(strlen(p)>=sizeof(pkgPrefix)) { 440 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); 441 exit(U_ILLEGAL_ARGUMENT_ERROR); 442 } 443 strcpy(pkgPrefix, p); 444} 445 446void 447Package::readPackage(const char *filename) { 448 UDataSwapper *ds; 449 const UDataInfo *pInfo; 450 UErrorCode errorCode; 451 452 const uint8_t *inBytes; 453 454 int32_t length, offset, i; 455 int32_t itemLength, typeEnum; 456 char type; 457 458 const UDataOffsetTOCEntry *inEntries; 459 460 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 461 462 /* read the file */ 463 inData=readFile(NULL, filename, inLength, type); 464 length=inLength; 465 466 /* 467 * swap the header - even if the swapping itself is a no-op 468 * because it tells us the header length 469 */ 470 errorCode=U_ZERO_ERROR; 471 makeTypeProps(type, inCharset, inIsBigEndian); 472 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 473 if(U_FAILURE(errorCode)) { 474 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 475 filename, u_errorName(errorCode)); 476 exit(errorCode); 477 } 478 479 ds->printError=printPackageError; 480 ds->printErrorContext=stderr; 481 482 headerLength=sizeof(header); 483 if(length<headerLength) { 484 headerLength=length; 485 } 486 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 487 if(U_FAILURE(errorCode)) { 488 exit(errorCode); 489 } 490 491 /* check data format and format version */ 492 pInfo=(const UDataInfo *)((const char *)inData+4); 493 if(!( 494 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 495 pInfo->dataFormat[1]==0x6d && 496 pInfo->dataFormat[2]==0x6e && 497 pInfo->dataFormat[3]==0x44 && 498 pInfo->formatVersion[0]==1 499 )) { 500 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 501 pInfo->dataFormat[0], pInfo->dataFormat[1], 502 pInfo->dataFormat[2], pInfo->dataFormat[3], 503 pInfo->formatVersion[0]); 504 exit(U_UNSUPPORTED_ERROR); 505 } 506 inIsBigEndian=(UBool)pInfo->isBigEndian; 507 inCharset=pInfo->charsetFamily; 508 509 inBytes=(const uint8_t *)inData+headerLength; 510 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 511 512 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 513 length-=headerLength; 514 if(length<4) { 515 /* itemCount does not fit */ 516 offset=0x7fffffff; 517 } else { 518 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 519 setItemCapacity(itemCount); /* resize so there's space */ 520 if(itemCount==0) { 521 offset=4; 522 } else if(length<(4+8*itemCount)) { 523 /* ToC table does not fit */ 524 offset=0x7fffffff; 525 } else { 526 /* offset of the last item plus at least 20 bytes for its header */ 527 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 528 } 529 } 530 if(length<offset) { 531 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 532 (long)length); 533 exit(U_INDEX_OUTOFBOUNDS_ERROR); 534 } 535 /* do not modify the package length variable until the last item's length is set */ 536 537 if(itemCount<=0) { 538 if(doAutoPrefix) { 539 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); 540 exit(U_INVALID_FORMAT_ERROR); 541 } 542 } else { 543 char prefix[MAX_PKG_NAME_LENGTH+4]; 544 char *s, *inItemStrings; 545 546 if(itemCount>itemMax) { 547 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 548 exit(U_BUFFER_OVERFLOW_ERROR); 549 } 550 551 /* swap the item name strings */ 552 int32_t stringsOffset=4+8*itemCount; 553 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 554 555 // don't include padding bytes at the end of the item names 556 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 557 --itemLength; 558 } 559 560 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 561 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 562 exit(U_BUFFER_OVERFLOW_ERROR); 563 } 564 565 inItemStrings=inStrings+inStringTop; 566 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 567 if(U_FAILURE(errorCode)) { 568 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 569 exit(U_INVALID_FORMAT_ERROR); 570 } 571 inStringTop+=itemLength; 572 573 // reset the Item entries 574 memset(items, 0, itemCount*sizeof(Item)); 575 576 /* 577 * Get the common prefix of the items. 578 * New-style ICU .dat packages use tree separators ('/') between package names, 579 * tree names, and item names, 580 * while old-style ICU .dat packages (before multi-tree support) 581 * use an underscore ('_') between package and item names. 582 */ 583 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 584 s=inItemStrings+offset; // name of the first entry 585 int32_t prefixLength; 586 if(doAutoPrefix) { 587 // Use the first entry's prefix. Must be a new-style package. 588 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); 589 if(prefixLimit==NULL) { 590 fprintf(stderr, 591 "icupkg: --auto_toc_prefix[_with_type] but " 592 "the first entry \"%s\" does not contain a '%c'\n", 593 s, U_TREE_ENTRY_SEP_CHAR); 594 exit(U_INVALID_FORMAT_ERROR); 595 } 596 prefixLength=(int32_t)(prefixLimit-s); 597 if(prefixLength==0 || prefixLength>=LENGTHOF(pkgPrefix)) { 598 fprintf(stderr, 599 "icupkg: --auto_toc_prefix[_with_type] but " 600 "the prefix of the first entry \"%s\" is empty or too long\n", 601 s); 602 exit(U_INVALID_FORMAT_ERROR); 603 } 604 if(prefixEndsWithType && s[prefixLength-1]!=type) { 605 fprintf(stderr, 606 "icupkg: --auto_toc_prefix_with_type but " 607 "the prefix of the first entry \"%s\" does not end with '%c'\n", 608 s, type); 609 exit(U_INVALID_FORMAT_ERROR); 610 } 611 memcpy(pkgPrefix, s, prefixLength); 612 pkgPrefix[prefixLength]=0; 613 memcpy(prefix, s, ++prefixLength); // include the / 614 } else { 615 // Use the package basename as prefix. 616 int32_t inPkgNameLength=strlen(inPkgName); 617 memcpy(prefix, inPkgName, inPkgNameLength); 618 prefixLength=inPkgNameLength; 619 620 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 621 0==memcmp(s, inPkgName, inPkgNameLength) && 622 s[inPkgNameLength]=='_' 623 ) { 624 // old-style .dat package 625 prefix[prefixLength++]='_'; 626 } else { 627 // new-style .dat package 628 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 629 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 630 // then the test in the loop below will fail 631 } 632 } 633 prefix[prefixLength]=0; 634 635 /* read the ToC table */ 636 for(i=0; i<itemCount; ++i) { 637 // skip the package part of the item name, error if it does not match the actual package name 638 // or if nothing follows the package name 639 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 640 s=inItemStrings+offset; 641 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 642 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 643 s, prefix); 644 exit(U_INVALID_FORMAT_ERROR); 645 } 646 items[i].name=s+prefixLength; 647 648 // set the item's data 649 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 650 if(i>0) { 651 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 652 653 // set the previous item's platform type 654 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 655 if(typeEnum<0 || U_FAILURE(errorCode)) { 656 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 657 exit(U_INVALID_FORMAT_ERROR); 658 } 659 items[i-1].type=makeTypeLetter(typeEnum); 660 } 661 items[i].isDataOwned=FALSE; 662 } 663 // set the last item's length 664 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 665 666 // set the last item's platform type 667 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 668 if(typeEnum<0 || U_FAILURE(errorCode)) { 669 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 670 exit(U_INVALID_FORMAT_ERROR); 671 } 672 items[itemCount-1].type=makeTypeLetter(typeEnum); 673 674 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 675 // sort the item names for the local charset 676 sortItems(); 677 } 678 } 679 680 udata_closeSwapper(ds); 681} 682 683char 684Package::getInType() { 685 return makeTypeLetter(inCharset, inIsBigEndian); 686} 687 688void 689Package::writePackage(const char *filename, char outType, const char *comment) { 690 char prefix[MAX_PKG_NAME_LENGTH+4]; 691 UDataOffsetTOCEntry entry; 692 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 693 FILE *file; 694 Item *pItem; 695 char *name; 696 UErrorCode errorCode; 697 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 698 uint8_t outCharset; 699 UBool outIsBigEndian; 700 701 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 702 703 // if there is an explicit comment, then use it, else use what's in the current header 704 if(comment!=NULL) { 705 /* get the header size minus the current comment */ 706 DataHeader *pHeader; 707 int32_t length; 708 709 pHeader=(DataHeader *)header; 710 headerLength=4+pHeader->info.size; 711 length=(int32_t)strlen(comment); 712 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 713 fprintf(stderr, "icupkg: comment too long\n"); 714 exit(U_BUFFER_OVERFLOW_ERROR); 715 } 716 memcpy(header+headerLength, comment, length+1); 717 headerLength+=length; 718 if(headerLength&0xf) { 719 /* NUL-pad the header to a multiple of 16 */ 720 length=(headerLength+0xf)&~0xf; 721 memset(header+headerLength, 0, length-headerLength); 722 headerLength=length; 723 } 724 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 725 } 726 727 makeTypeProps(outType, outCharset, outIsBigEndian); 728 729 // open (TYPE_COUNT-2) swappers 730 // one is a no-op for local type==outType 731 // one type (TYPE_LE) is bogus 732 errorCode=U_ZERO_ERROR; 733 i=makeTypeEnum(outType); 734 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 735 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 736 ds[TYPE_LE]=NULL; 737 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 738 if(U_FAILURE(errorCode)) { 739 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 740 exit(errorCode); 741 } 742 for(i=0; i<TYPE_COUNT; ++i) { 743 if(ds[i]!=NULL) { 744 ds[i]->printError=printPackageError; 745 ds[i]->printErrorContext=stderr; 746 } 747 } 748 749 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 750 751 // create the file and write its contents 752 file=fopen(filename, "wb"); 753 if(file==NULL) { 754 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 755 exit(U_FILE_ACCESS_ERROR); 756 } 757 758 // swap and write the header 759 if(dsLocalToOut!=NULL) { 760 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 761 if(U_FAILURE(errorCode)) { 762 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 763 exit(errorCode); 764 } 765 } 766 length=(int32_t)fwrite(header, 1, headerLength, file); 767 if(length!=headerLength) { 768 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 769 exit(U_FILE_ACCESS_ERROR); 770 } 771 772 // prepare and swap the package name with a tree separator 773 // for prepending to item names 774 if(pkgPrefix[0]==0) { 775 prefixLength=(int32_t)strlen(prefix); 776 } else { 777 prefixLength=(int32_t)strlen(pkgPrefix); 778 memcpy(prefix, pkgPrefix, prefixLength); 779 if(prefixEndsWithType) { 780 prefix[prefixLength-1]=outType; 781 } 782 } 783 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 784 prefix[prefixLength]=0; 785 if(dsLocalToOut!=NULL) { 786 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 787 if(U_FAILURE(errorCode)) { 788 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 789 exit(errorCode); 790 } 791 792 // swap and sort the item names (sorting needs to be done in the output charset) 793 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 794 if(U_FAILURE(errorCode)) { 795 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 796 exit(errorCode); 797 } 798 sortItems(); 799 } 800 801 // create the output item names in sorted order, with the package name prepended to each 802 for(i=0; i<itemCount; ++i) { 803 length=(int32_t)strlen(items[i].name); 804 name=allocString(FALSE, length+prefixLength); 805 memcpy(name, prefix, prefixLength); 806 memcpy(name+prefixLength, items[i].name, length+1); 807 items[i].name=name; 808 } 809 810 // calculate offsets for item names and items, pad to 16-align items 811 // align only the first item; each item's length is a multiple of 16 812 basenameOffset=4+8*itemCount; 813 offset=basenameOffset+outStringTop; 814 if((length=(offset&15))!=0) { 815 length=16-length; 816 memset(allocString(FALSE, length-1), 0xaa, length); 817 offset+=length; 818 } 819 820 // write the table of contents 821 // first the itemCount 822 outInt32=itemCount; 823 if(dsLocalToOut!=NULL) { 824 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 825 if(U_FAILURE(errorCode)) { 826 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 827 exit(errorCode); 828 } 829 } 830 length=(int32_t)fwrite(&outInt32, 1, 4, file); 831 if(length!=4) { 832 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 833 exit(U_FILE_ACCESS_ERROR); 834 } 835 836 // then write the item entries (and collect the maxItemLength) 837 maxItemLength=0; 838 for(i=0; i<itemCount; ++i) { 839 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 840 entry.dataOffset=(uint32_t)offset; 841 if(dsLocalToOut!=NULL) { 842 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 843 if(U_FAILURE(errorCode)) { 844 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 845 exit(errorCode); 846 } 847 } 848 length=(int32_t)fwrite(&entry, 1, 8, file); 849 if(length!=8) { 850 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 851 exit(U_FILE_ACCESS_ERROR); 852 } 853 854 length=items[i].length; 855 if(length>maxItemLength) { 856 maxItemLength=length; 857 } 858 offset+=length; 859 } 860 861 // write the item names 862 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 863 if(length!=outStringTop) { 864 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 865 exit(U_FILE_ACCESS_ERROR); 866 } 867 868 // write the items 869 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 870 int32_t type=makeTypeEnum(pItem->type); 871 if(ds[type]!=NULL) { 872 // swap each item from its platform properties to the desired ones 873 udata_swap( 874 ds[type], 875 pItem->data, pItem->length, pItem->data, 876 &errorCode); 877 if(U_FAILURE(errorCode)) { 878 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 879 exit(errorCode); 880 } 881 } 882 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 883 if(length!=pItem->length) { 884 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 885 exit(U_FILE_ACCESS_ERROR); 886 } 887 } 888 889 if(ferror(file)) { 890 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 891 exit(U_FILE_ACCESS_ERROR); 892 } 893 894 fclose(file); 895 for(i=0; i<TYPE_COUNT; ++i) { 896 udata_closeSwapper(ds[i]); 897 } 898} 899 900int32_t 901Package::findItem(const char *name, int32_t length) const { 902 int32_t i, start, limit; 903 int result; 904 905 /* do a binary search for the string */ 906 start=0; 907 limit=itemCount; 908 while(start<limit) { 909 i=(start+limit)/2; 910 if(length>=0) { 911 result=strncmp(name, items[i].name, length); 912 } else { 913 result=strcmp(name, items[i].name); 914 } 915 916 if(result==0) { 917 /* found */ 918 if(length>=0) { 919 /* 920 * if we compared just prefixes, then we may need to back up 921 * to the first item with this prefix 922 */ 923 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 924 --i; 925 } 926 } 927 return i; 928 } else if(result<0) { 929 limit=i; 930 } else /* result>0 */ { 931 start=i+1; 932 } 933 } 934 935 return ~start; /* not found, return binary-not of the insertion point */ 936} 937 938void 939Package::findItems(const char *pattern) { 940 const char *wild; 941 942 if(pattern==NULL || *pattern==0) { 943 findNextIndex=-1; 944 return; 945 } 946 947 findPrefix=pattern; 948 findSuffix=NULL; 949 findSuffixLength=0; 950 951 wild=strchr(pattern, '*'); 952 if(wild==NULL) { 953 // no wildcard 954 findPrefixLength=(int32_t)strlen(pattern); 955 } else { 956 // one wildcard 957 findPrefixLength=(int32_t)(wild-pattern); 958 findSuffix=wild+1; 959 findSuffixLength=(int32_t)strlen(findSuffix); 960 if(NULL!=strchr(findSuffix, '*')) { 961 // two or more wildcards 962 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 963 exit(U_PARSE_ERROR); 964 } 965 } 966 967 if(findPrefixLength==0) { 968 findNextIndex=0; 969 } else { 970 findNextIndex=findItem(findPrefix, findPrefixLength); 971 } 972} 973 974int32_t 975Package::findNextItem() { 976 const char *name, *middle, *treeSep; 977 int32_t idx, nameLength, middleLength; 978 979 if(findNextIndex<0) { 980 return -1; 981 } 982 983 while(findNextIndex<itemCount) { 984 idx=findNextIndex++; 985 name=items[idx].name; 986 nameLength=(int32_t)strlen(name); 987 if(nameLength<(findPrefixLength+findSuffixLength)) { 988 // item name too short for prefix & suffix 989 continue; 990 } 991 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 992 // left the range of names with this prefix 993 break; 994 } 995 middle=name+findPrefixLength; 996 middleLength=nameLength-findPrefixLength-findSuffixLength; 997 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 998 // suffix does not match 999 continue; 1000 } 1001 // prefix & suffix match 1002 1003 if(matchMode&MATCH_NOSLASH) { 1004 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 1005 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 1006 // the middle (matching the * wildcard) contains a tree separator / 1007 continue; 1008 } 1009 } 1010 1011 // found a matching item 1012 return idx; 1013 } 1014 1015 // no more items 1016 findNextIndex=-1; 1017 return -1; 1018} 1019 1020void 1021Package::setMatchMode(uint32_t mode) { 1022 matchMode=mode; 1023} 1024 1025void 1026Package::addItem(const char *name) { 1027 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 1028} 1029 1030void 1031Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 1032 int32_t idx; 1033 1034 idx=findItem(name); 1035 if(idx<0) { 1036 // new item, make space at the insertion point 1037 ensureItemCapacity(); 1038 // move the following items down 1039 idx=~idx; 1040 if(idx<itemCount) { 1041 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 1042 } 1043 ++itemCount; 1044 1045 // reset this Item entry 1046 memset(items+idx, 0, sizeof(Item)); 1047 1048 // copy the item's name 1049 items[idx].name=allocString(TRUE, strlen(name)); 1050 strcpy(items[idx].name, name); 1051 pathToTree(items[idx].name); 1052 } else { 1053 // same-name item found, replace it 1054 if(items[idx].isDataOwned) { 1055 free(items[idx].data); 1056 } 1057 1058 // keep the item's name since it is the same 1059 } 1060 1061 // set the item's data 1062 items[idx].data=data; 1063 items[idx].length=length; 1064 items[idx].isDataOwned=isDataOwned; 1065 items[idx].type=type; 1066} 1067 1068void 1069Package::addFile(const char *filesPath, const char *name) { 1070 uint8_t *data; 1071 int32_t length; 1072 char type; 1073 1074 data=readFile(filesPath, name, length, type); 1075 // readFile() exits the tool if it fails 1076 addItem(name, data, length, TRUE, type); 1077} 1078 1079void 1080Package::addItems(const Package &listPkg) { 1081 const Item *pItem; 1082 int32_t i; 1083 1084 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1085 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1086 } 1087} 1088 1089void 1090Package::removeItem(int32_t idx) { 1091 if(idx>=0) { 1092 // remove the item 1093 if(items[idx].isDataOwned) { 1094 free(items[idx].data); 1095 } 1096 1097 // move the following items up 1098 if((idx+1)<itemCount) { 1099 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1100 } 1101 --itemCount; 1102 1103 if(idx<=findNextIndex) { 1104 --findNextIndex; 1105 } 1106 } 1107} 1108 1109void 1110Package::removeItems(const char *pattern) { 1111 int32_t idx; 1112 1113 findItems(pattern); 1114 while((idx=findNextItem())>=0) { 1115 removeItem(idx); 1116 } 1117} 1118 1119void 1120Package::removeItems(const Package &listPkg) { 1121 const Item *pItem; 1122 int32_t i; 1123 1124 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1125 removeItems(pItem->name); 1126 } 1127} 1128 1129void 1130Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1131 char filename[1024]; 1132 UDataSwapper *ds; 1133 FILE *file; 1134 Item *pItem; 1135 int32_t fileLength; 1136 uint8_t itemCharset, outCharset; 1137 UBool itemIsBigEndian, outIsBigEndian; 1138 1139 if(idx<0 || itemCount<=idx) { 1140 return; 1141 } 1142 pItem=items+idx; 1143 1144 // swap the data to the outType 1145 // outType==0: don't swap 1146 if(outType!=0 && pItem->type!=outType) { 1147 // open the swapper 1148 UErrorCode errorCode=U_ZERO_ERROR; 1149 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1150 makeTypeProps(outType, outCharset, outIsBigEndian); 1151 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1152 if(U_FAILURE(errorCode)) { 1153 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1154 (long)idx, u_errorName(errorCode)); 1155 exit(errorCode); 1156 } 1157 1158 ds->printError=printPackageError; 1159 ds->printErrorContext=stderr; 1160 1161 // swap the item from its platform properties to the desired ones 1162 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1163 if(U_FAILURE(errorCode)) { 1164 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1165 exit(errorCode); 1166 } 1167 udata_closeSwapper(ds); 1168 pItem->type=outType; 1169 } 1170 1171 // create the file and write its contents 1172 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1173 file=fopen(filename, "wb"); 1174 if(file==NULL) { 1175 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1176 exit(U_FILE_ACCESS_ERROR); 1177 } 1178 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1179 1180 if(ferror(file) || fileLength!=pItem->length) { 1181 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1182 exit(U_FILE_ACCESS_ERROR); 1183 } 1184 fclose(file); 1185} 1186 1187void 1188Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1189 extractItem(filesPath, items[idx].name, idx, outType); 1190} 1191 1192void 1193Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1194 int32_t idx; 1195 1196 findItems(pattern); 1197 while((idx=findNextItem())>=0) { 1198 extractItem(filesPath, idx, outType); 1199 } 1200} 1201 1202void 1203Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1204 const Item *pItem; 1205 int32_t i; 1206 1207 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1208 extractItems(filesPath, pItem->name, outType); 1209 } 1210} 1211 1212int32_t 1213Package::getItemCount() const { 1214 return itemCount; 1215} 1216 1217const Item * 1218Package::getItem(int32_t idx) const { 1219 if (0 <= idx && idx < itemCount) { 1220 return &items[idx]; 1221 } 1222 return NULL; 1223} 1224 1225void 1226Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1227 // check dependency: make sure the target item is in the package 1228 Package *me=(Package *)context; 1229 if(me->findItem(targetName)<0) { 1230 me->isMissingItems=TRUE; 1231 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1232 } 1233} 1234 1235UBool 1236Package::checkDependencies() { 1237 isMissingItems=FALSE; 1238 enumDependencies(this, checkDependency); 1239 return (UBool)!isMissingItems; 1240} 1241 1242void 1243Package::enumDependencies(void *context, CheckDependency check) { 1244 int32_t i; 1245 1246 for(i=0; i<itemCount; ++i) { 1247 enumDependencies(items+i, context, check); 1248 } 1249} 1250 1251char * 1252Package::allocString(UBool in, int32_t length) { 1253 char *p; 1254 int32_t top; 1255 1256 if(in) { 1257 top=inStringTop; 1258 p=inStrings+top; 1259 } else { 1260 top=outStringTop; 1261 p=outStrings+top; 1262 } 1263 top+=length+1; 1264 1265 if(top>STRING_STORE_SIZE) { 1266 fprintf(stderr, "icupkg: string storage overflow\n"); 1267 exit(U_BUFFER_OVERFLOW_ERROR); 1268 } 1269 if(in) { 1270 inStringTop=top; 1271 } else { 1272 outStringTop=top; 1273 } 1274 return p; 1275} 1276 1277void 1278Package::sortItems() { 1279 UErrorCode errorCode=U_ZERO_ERROR; 1280 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1281 if(U_FAILURE(errorCode)) { 1282 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1283 exit(errorCode); 1284 } 1285} 1286 1287void Package::setItemCapacity(int32_t max) 1288{ 1289 if(max<=itemMax) { 1290 return; 1291 } 1292 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1293 Item *oldItems = items; 1294 if(newItems == NULL) { 1295 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", 1296 (unsigned long)max*sizeof(items[0]), max); 1297 exit(U_MEMORY_ALLOCATION_ERROR); 1298 } 1299 if(items && itemCount>0) { 1300 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1301 } 1302 itemMax = max; 1303 items = newItems; 1304 uprv_free(oldItems); 1305} 1306 1307void Package::ensureItemCapacity() 1308{ 1309 if((itemCount+1)>itemMax) { 1310 setItemCapacity(itemCount+kItemsChunk); 1311 } 1312} 1313 1314U_NAMESPACE_END 1315