1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2012, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: package.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005aug25 14* created by: Markus W. Scherer 15* 16* Read, modify, and write ICU .dat data package files. 17* This is an integral part of the icupkg tool, moved to the toolutil library 18* because parts of tool implementations tend to be later shared by 19* other tools. 20* Subsumes functionality and implementation code from 21* gencmn, decmn, and icuswap tools. 22*/ 23 24#include "unicode/utypes.h" 25#include "unicode/putil.h" 26#include "unicode/udata.h" 27#include "cstring.h" 28#include "uarrsort.h" 29#include "ucmndata.h" 30#include "udataswp.h" 31#include "swapimpl.h" 32#include "toolutil.h" 33#include "package.h" 34#include "cmemory.h" 35 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40 41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43// general definitions ----------------------------------------------------- *** 44 45#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 46 47/* UDataInfo cf. udata.h */ 48static const UDataInfo dataInfo={ 49 (uint16_t)sizeof(UDataInfo), 50 0, 51 52 U_IS_BIG_ENDIAN, 53 U_CHARSET_FAMILY, 54 (uint8_t)sizeof(UChar), 55 0, 56 57 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 58 {1, 0, 0, 0}, /* formatVersion */ 59 {3, 0, 0, 0} /* dataVersion */ 60}; 61 62U_CDECL_BEGIN 63static void U_CALLCONV 64printPackageError(void *context, const char *fmt, va_list args) { 65 vfprintf((FILE *)context, fmt, args); 66} 67U_CDECL_END 68 69static uint16_t 70readSwapUInt16(uint16_t x) { 71 return (uint16_t)((x<<8)|(x>>8)); 72} 73 74// platform types ---------------------------------------------------------- *** 75 76static const char *types="lb?e"; 77 78enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 79 80static inline int32_t 81makeTypeEnum(uint8_t charset, UBool isBigEndian) { 82 return 2*(int32_t)charset+isBigEndian; 83} 84 85static inline int32_t 86makeTypeEnum(char type) { 87 return 88 type == 'l' ? TYPE_L : 89 type == 'b' ? TYPE_B : 90 type == 'e' ? TYPE_E : 91 -1; 92} 93 94static inline char 95makeTypeLetter(uint8_t charset, UBool isBigEndian) { 96 return types[makeTypeEnum(charset, isBigEndian)]; 97} 98 99static inline char 100makeTypeLetter(int32_t typeEnum) { 101 return types[typeEnum]; 102} 103 104static void 105makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 106 int32_t typeEnum=makeTypeEnum(type); 107 charset=(uint8_t)(typeEnum>>1); 108 isBigEndian=(UBool)(typeEnum&1); 109} 110 111U_CFUNC const UDataInfo * 112getDataInfo(const uint8_t *data, int32_t length, 113 int32_t &infoLength, int32_t &headerLength, 114 UErrorCode *pErrorCode) { 115 const DataHeader *pHeader; 116 const UDataInfo *pInfo; 117 118 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 119 return NULL; 120 } 121 if( data==NULL || 122 (length>=0 && length<(int32_t)sizeof(DataHeader)) 123 ) { 124 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 125 return NULL; 126 } 127 128 pHeader=(const DataHeader *)data; 129 pInfo=&pHeader->info; 130 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 131 pHeader->dataHeader.magic1!=0xda || 132 pHeader->dataHeader.magic2!=0x27 || 133 pInfo->sizeofUChar!=2 134 ) { 135 *pErrorCode=U_UNSUPPORTED_ERROR; 136 return NULL; 137 } 138 139 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 140 headerLength=pHeader->dataHeader.headerSize; 141 infoLength=pInfo->size; 142 } else { 143 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 144 infoLength=readSwapUInt16(pInfo->size); 145 } 146 147 if( headerLength<(int32_t)sizeof(DataHeader) || 148 infoLength<(int32_t)sizeof(UDataInfo) || 149 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 150 (length>=0 && length<headerLength) 151 ) { 152 *pErrorCode=U_UNSUPPORTED_ERROR; 153 return NULL; 154 } 155 156 return pInfo; 157} 158 159static int32_t 160getTypeEnumForInputData(const uint8_t *data, int32_t length, 161 UErrorCode *pErrorCode) { 162 const UDataInfo *pInfo; 163 int32_t infoLength, headerLength; 164 165 /* getDataInfo() checks for illegal arguments */ 166 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 167 if(pInfo==NULL) { 168 return -1; 169 } 170 171 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 172} 173 174// file handling ----------------------------------------------------------- *** 175 176static void 177extractPackageName(const char *filename, 178 char pkg[], int32_t capacity) { 179 const char *basename; 180 int32_t len; 181 182 basename=findBasename(filename); 183 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 184 185 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 186 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 187 basename); 188 exit(U_ILLEGAL_ARGUMENT_ERROR); 189 } 190 191 if(len>=capacity) { 192 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 193 basename, (long)capacity); 194 exit(U_ILLEGAL_ARGUMENT_ERROR); 195 } 196 197 memcpy(pkg, basename, len); 198 pkg[len]=0; 199} 200 201static int32_t 202getFileLength(FILE *f) { 203 int32_t length; 204 205 fseek(f, 0, SEEK_END); 206 length=(int32_t)ftell(f); 207 fseek(f, 0, SEEK_SET); 208 return length; 209} 210 211/* 212 * Turn tree separators and alternate file separators into normal file separators. 213 */ 214#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 215#define treeToPath(s) 216#else 217static void 218treeToPath(char *s) { 219 char *t; 220 221 for(t=s; *t!=0; ++t) { 222 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 223 *t=U_FILE_SEP_CHAR; 224 } 225 } 226} 227#endif 228 229/* 230 * Turn file separators into tree separators. 231 */ 232#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 233#define pathToTree(s) 234#else 235static void 236pathToTree(char *s) { 237 char *t; 238 239 for(t=s; *t!=0; ++t) { 240 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 241 *t=U_TREE_ENTRY_SEP_CHAR; 242 } 243 } 244} 245#endif 246 247/* 248 * Prepend the path (if any) to the name and run the name through treeToName(). 249 */ 250static void 251makeFullFilename(const char *path, const char *name, 252 char *filename, int32_t capacity) { 253 char *s; 254 255 // prepend the path unless NULL or empty 256 if(path!=NULL && path[0]!=0) { 257 if((int32_t)(strlen(path)+1)>=capacity) { 258 fprintf(stderr, "pathname too long: \"%s\"\n", path); 259 exit(U_BUFFER_OVERFLOW_ERROR); 260 } 261 strcpy(filename, path); 262 263 // make sure the path ends with a file separator 264 s=strchr(filename, 0); 265 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 266 *s++=U_FILE_SEP_CHAR; 267 } 268 } else { 269 s=filename; 270 } 271 272 // turn the name into a filename, turn tree separators into file separators 273 if((int32_t)((s-filename)+strlen(name))>=capacity) { 274 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 275 exit(U_BUFFER_OVERFLOW_ERROR); 276 } 277 strcpy(s, name); 278 treeToPath(s); 279} 280 281static void 282makeFullFilenameAndDirs(const char *path, const char *name, 283 char *filename, int32_t capacity) { 284 char *sep; 285 UErrorCode errorCode; 286 287 makeFullFilename(path, name, filename, capacity); 288 289 // make tree directories 290 errorCode=U_ZERO_ERROR; 291 sep=strchr(filename, 0)-strlen(name); 292 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 293 if(sep!=filename) { 294 *sep=0; // truncate temporarily 295 uprv_mkdir(filename, &errorCode); 296 if(U_FAILURE(errorCode)) { 297 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 298 exit(U_FILE_ACCESS_ERROR); 299 } 300 } 301 *sep++=U_FILE_SEP_CHAR; // restore file separator character 302 } 303} 304 305static uint8_t * 306readFile(const char *path, const char *name, int32_t &length, char &type) { 307 char filename[1024]; 308 FILE *file; 309 uint8_t *data; 310 UErrorCode errorCode; 311 int32_t fileLength, typeEnum; 312 313 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 314 315 /* open the input file, get its length, allocate memory for it, read the file */ 316 file=fopen(filename, "rb"); 317 if(file==NULL) { 318 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 319 exit(U_FILE_ACCESS_ERROR); 320 } 321 322 /* get the file length */ 323 fileLength=getFileLength(file); 324 if(ferror(file) || fileLength<=0) { 325 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 326 fclose(file); 327 exit(U_FILE_ACCESS_ERROR); 328 } 329 330 /* allocate the buffer, pad to multiple of 16 */ 331 length=(fileLength+0xf)&~0xf; 332 data=(uint8_t *)uprv_malloc(length); 333 if(data==NULL) { 334 fclose(file); 335 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 336 exit(U_MEMORY_ALLOCATION_ERROR); 337 } 338 339 /* read the file */ 340 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 341 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 342 fclose(file); 343 free(data); 344 exit(U_FILE_ACCESS_ERROR); 345 } 346 347 /* pad the file to a multiple of 16 using the usual padding byte */ 348 if(fileLength<length) { 349 memset(data+fileLength, 0xaa, length-fileLength); 350 } 351 352 fclose(file); 353 354 // minimum check for ICU-format data 355 errorCode=U_ZERO_ERROR; 356 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 357 if(typeEnum<0 || U_FAILURE(errorCode)) { 358 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 359 free(data); 360#if !UCONFIG_NO_LEGACY_CONVERSION 361 exit(U_INVALID_FORMAT_ERROR); 362#else 363 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 364 exit(0); 365#endif 366 } 367 type=makeTypeLetter(typeEnum); 368 369 return data; 370} 371 372// .dat package file representation ---------------------------------------- *** 373 374U_CDECL_BEGIN 375 376static int32_t U_CALLCONV 377compareItems(const void * /*context*/, const void *left, const void *right) { 378 U_NAMESPACE_USE 379 380 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 381} 382 383U_CDECL_END 384 385U_NAMESPACE_BEGIN 386 387Package::Package() { 388 inPkgName[0]=0; 389 inData=NULL; 390 inLength=0; 391 inCharset=U_CHARSET_FAMILY; 392 inIsBigEndian=U_IS_BIG_ENDIAN; 393 394 itemCount=0; 395 itemMax=0; 396 items=NULL; 397 398 inStringTop=outStringTop=0; 399 400 matchMode=0; 401 findPrefix=findSuffix=NULL; 402 findPrefixLength=findSuffixLength=0; 403 findNextIndex=-1; 404 405 // create a header for an empty package 406 DataHeader *pHeader; 407 pHeader=(DataHeader *)header; 408 pHeader->dataHeader.magic1=0xda; 409 pHeader->dataHeader.magic2=0x27; 410 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 411 headerLength=(int32_t)(4+sizeof(dataInfo)); 412 if(headerLength&0xf) { 413 /* NUL-pad the header to a multiple of 16 */ 414 int32_t length=(headerLength+0xf)&~0xf; 415 memset(header+headerLength, 0, length-headerLength); 416 headerLength=length; 417 } 418 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 419} 420 421Package::~Package() { 422 int32_t idx; 423 424 free(inData); 425 426 for(idx=0; idx<itemCount; ++idx) { 427 if(items[idx].isDataOwned) { 428 free(items[idx].data); 429 } 430 } 431 432 uprv_free((void*)items); 433} 434 435void 436Package::readPackage(const char *filename) { 437 UDataSwapper *ds; 438 const UDataInfo *pInfo; 439 UErrorCode errorCode; 440 441 const uint8_t *inBytes; 442 443 int32_t length, offset, i; 444 int32_t itemLength, typeEnum; 445 char type; 446 447 const UDataOffsetTOCEntry *inEntries; 448 449 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 450 451 /* read the file */ 452 inData=readFile(NULL, filename, inLength, type); 453 length=inLength; 454 455 /* 456 * swap the header - even if the swapping itself is a no-op 457 * because it tells us the header length 458 */ 459 errorCode=U_ZERO_ERROR; 460 makeTypeProps(type, inCharset, inIsBigEndian); 461 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 462 if(U_FAILURE(errorCode)) { 463 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 464 filename, u_errorName(errorCode)); 465 exit(errorCode); 466 } 467 468 ds->printError=printPackageError; 469 ds->printErrorContext=stderr; 470 471 headerLength=sizeof(header); 472 if(length<headerLength) { 473 headerLength=length; 474 } 475 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 476 if(U_FAILURE(errorCode)) { 477 exit(errorCode); 478 } 479 480 /* check data format and format version */ 481 pInfo=(const UDataInfo *)((const char *)inData+4); 482 if(!( 483 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 484 pInfo->dataFormat[1]==0x6d && 485 pInfo->dataFormat[2]==0x6e && 486 pInfo->dataFormat[3]==0x44 && 487 pInfo->formatVersion[0]==1 488 )) { 489 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 490 pInfo->dataFormat[0], pInfo->dataFormat[1], 491 pInfo->dataFormat[2], pInfo->dataFormat[3], 492 pInfo->formatVersion[0]); 493 exit(U_UNSUPPORTED_ERROR); 494 } 495 inIsBigEndian=(UBool)pInfo->isBigEndian; 496 inCharset=pInfo->charsetFamily; 497 498 inBytes=(const uint8_t *)inData+headerLength; 499 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 500 501 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 502 length-=headerLength; 503 if(length<4) { 504 /* itemCount does not fit */ 505 offset=0x7fffffff; 506 } else { 507 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 508 setItemCapacity(itemCount); /* resize so there's space */ 509 if(itemCount==0) { 510 offset=4; 511 } else if(length<(4+8*itemCount)) { 512 /* ToC table does not fit */ 513 offset=0x7fffffff; 514 } else { 515 /* offset of the last item plus at least 20 bytes for its header */ 516 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 517 } 518 } 519 if(length<offset) { 520 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 521 (long)length); 522 exit(U_INDEX_OUTOFBOUNDS_ERROR); 523 } 524 /* do not modify the package length variable until the last item's length is set */ 525 526 if(itemCount>0) { 527 char prefix[MAX_PKG_NAME_LENGTH+4]; 528 char *s, *inItemStrings; 529 int32_t inPkgNameLength, prefixLength, stringsOffset; 530 531 if(itemCount>itemMax) { 532 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 533 exit(U_BUFFER_OVERFLOW_ERROR); 534 } 535 536 /* swap the item name strings */ 537 stringsOffset=4+8*itemCount; 538 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 539 540 // don't include padding bytes at the end of the item names 541 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 542 --itemLength; 543 } 544 545 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 546 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 547 exit(U_BUFFER_OVERFLOW_ERROR); 548 } 549 550 inItemStrings=inStrings+inStringTop; 551 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 552 if(U_FAILURE(errorCode)) { 553 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 554 exit(U_INVALID_FORMAT_ERROR); 555 } 556 inStringTop+=itemLength; 557 558 // reset the Item entries 559 memset(items, 0, itemCount*sizeof(Item)); 560 561 inPkgNameLength=strlen(inPkgName); 562 memcpy(prefix, inPkgName, inPkgNameLength); 563 prefixLength=inPkgNameLength; 564 565 /* 566 * Get the common prefix of the items. 567 * New-style ICU .dat packages use tree separators ('/') between package names, 568 * tree names, and item names, 569 * while old-style ICU .dat packages (before multi-tree support) 570 * use an underscore ('_') between package and item names. 571 */ 572 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 573 s=inItemStrings+offset; 574 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 575 0==memcmp(s, inPkgName, inPkgNameLength) && 576 s[inPkgNameLength]=='_' 577 ) { 578 // old-style .dat package 579 prefix[prefixLength++]='_'; 580 } else { 581 // new-style .dat package 582 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 583 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 584 // then the test in the loop below will fail 585 } 586 prefix[prefixLength]=0; 587 588 /* read the ToC table */ 589 for(i=0; i<itemCount; ++i) { 590 // skip the package part of the item name, error if it does not match the actual package name 591 // or if nothing follows the package name 592 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 593 s=inItemStrings+offset; 594 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 595 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 596 s, prefix); 597 exit(U_UNSUPPORTED_ERROR); 598 } 599 items[i].name=s+prefixLength; 600 601 // set the item's data 602 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 603 if(i>0) { 604 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 605 606 // set the previous item's platform type 607 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 608 if(typeEnum<0 || U_FAILURE(errorCode)) { 609 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 610 exit(U_INVALID_FORMAT_ERROR); 611 } 612 items[i-1].type=makeTypeLetter(typeEnum); 613 } 614 items[i].isDataOwned=FALSE; 615 } 616 // set the last item's length 617 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 618 619 // set the last item's platform type 620 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 621 if(typeEnum<0 || U_FAILURE(errorCode)) { 622 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 623 exit(U_INVALID_FORMAT_ERROR); 624 } 625 items[itemCount-1].type=makeTypeLetter(typeEnum); 626 627 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 628 // sort the item names for the local charset 629 sortItems(); 630 } 631 } 632 633 udata_closeSwapper(ds); 634} 635 636char 637Package::getInType() { 638 return makeTypeLetter(inCharset, inIsBigEndian); 639} 640 641void 642Package::writePackage(const char *filename, char outType, const char *comment) { 643 char prefix[MAX_PKG_NAME_LENGTH+4]; 644 UDataOffsetTOCEntry entry; 645 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 646 FILE *file; 647 Item *pItem; 648 char *name; 649 UErrorCode errorCode; 650 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 651 uint8_t outCharset; 652 UBool outIsBigEndian; 653 654 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 655 656 // if there is an explicit comment, then use it, else use what's in the current header 657 if(comment!=NULL) { 658 /* get the header size minus the current comment */ 659 DataHeader *pHeader; 660 int32_t length; 661 662 pHeader=(DataHeader *)header; 663 headerLength=4+pHeader->info.size; 664 length=(int32_t)strlen(comment); 665 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 666 fprintf(stderr, "icupkg: comment too long\n"); 667 exit(U_BUFFER_OVERFLOW_ERROR); 668 } 669 memcpy(header+headerLength, comment, length+1); 670 headerLength+=length; 671 if(headerLength&0xf) { 672 /* NUL-pad the header to a multiple of 16 */ 673 length=(headerLength+0xf)&~0xf; 674 memset(header+headerLength, 0, length-headerLength); 675 headerLength=length; 676 } 677 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 678 } 679 680 makeTypeProps(outType, outCharset, outIsBigEndian); 681 682 // open (TYPE_COUNT-2) swappers 683 // one is a no-op for local type==outType 684 // one type (TYPE_LE) is bogus 685 errorCode=U_ZERO_ERROR; 686 i=makeTypeEnum(outType); 687 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 688 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 689 ds[TYPE_LE]=NULL; 690 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 691 if(U_FAILURE(errorCode)) { 692 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 693 exit(errorCode); 694 } 695 for(i=0; i<TYPE_COUNT; ++i) { 696 if(ds[i]!=NULL) { 697 ds[i]->printError=printPackageError; 698 ds[i]->printErrorContext=stderr; 699 } 700 } 701 702 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 703 704 // create the file and write its contents 705 file=fopen(filename, "wb"); 706 if(file==NULL) { 707 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 708 exit(U_FILE_ACCESS_ERROR); 709 } 710 711 // swap and write the header 712 if(dsLocalToOut!=NULL) { 713 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 714 if(U_FAILURE(errorCode)) { 715 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 716 exit(errorCode); 717 } 718 } 719 length=(int32_t)fwrite(header, 1, headerLength, file); 720 if(length!=headerLength) { 721 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 722 exit(U_FILE_ACCESS_ERROR); 723 } 724 725 // prepare and swap the package name with a tree separator 726 // for prepending to item names 727 strcat(prefix, U_TREE_ENTRY_SEP_STRING); 728 prefixLength=(int32_t)strlen(prefix); 729 if(dsLocalToOut!=NULL) { 730 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 731 if(U_FAILURE(errorCode)) { 732 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 733 exit(errorCode); 734 } 735 736 // swap and sort the item names (sorting needs to be done in the output charset) 737 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 738 if(U_FAILURE(errorCode)) { 739 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 740 exit(errorCode); 741 } 742 sortItems(); 743 } 744 745 // create the output item names in sorted order, with the package name prepended to each 746 for(i=0; i<itemCount; ++i) { 747 length=(int32_t)strlen(items[i].name); 748 name=allocString(FALSE, length+prefixLength); 749 memcpy(name, prefix, prefixLength); 750 memcpy(name+prefixLength, items[i].name, length+1); 751 items[i].name=name; 752 } 753 754 // calculate offsets for item names and items, pad to 16-align items 755 // align only the first item; each item's length is a multiple of 16 756 basenameOffset=4+8*itemCount; 757 offset=basenameOffset+outStringTop; 758 if((length=(offset&15))!=0) { 759 length=16-length; 760 memset(allocString(FALSE, length-1), 0xaa, length); 761 offset+=length; 762 } 763 764 // write the table of contents 765 // first the itemCount 766 outInt32=itemCount; 767 if(dsLocalToOut!=NULL) { 768 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 769 if(U_FAILURE(errorCode)) { 770 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 771 exit(errorCode); 772 } 773 } 774 length=(int32_t)fwrite(&outInt32, 1, 4, file); 775 if(length!=4) { 776 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 777 exit(U_FILE_ACCESS_ERROR); 778 } 779 780 // then write the item entries (and collect the maxItemLength) 781 maxItemLength=0; 782 for(i=0; i<itemCount; ++i) { 783 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 784 entry.dataOffset=(uint32_t)offset; 785 if(dsLocalToOut!=NULL) { 786 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 787 if(U_FAILURE(errorCode)) { 788 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 789 exit(errorCode); 790 } 791 } 792 length=(int32_t)fwrite(&entry, 1, 8, file); 793 if(length!=8) { 794 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 795 exit(U_FILE_ACCESS_ERROR); 796 } 797 798 length=items[i].length; 799 if(length>maxItemLength) { 800 maxItemLength=length; 801 } 802 offset+=length; 803 } 804 805 // write the item names 806 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 807 if(length!=outStringTop) { 808 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 809 exit(U_FILE_ACCESS_ERROR); 810 } 811 812 // write the items 813 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 814 int32_t type=makeTypeEnum(pItem->type); 815 if(ds[type]!=NULL) { 816 // swap each item from its platform properties to the desired ones 817 udata_swap( 818 ds[type], 819 pItem->data, pItem->length, pItem->data, 820 &errorCode); 821 if(U_FAILURE(errorCode)) { 822 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 823 exit(errorCode); 824 } 825 } 826 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 827 if(length!=pItem->length) { 828 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 829 exit(U_FILE_ACCESS_ERROR); 830 } 831 } 832 833 if(ferror(file)) { 834 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 835 exit(U_FILE_ACCESS_ERROR); 836 } 837 838 fclose(file); 839 for(i=0; i<TYPE_COUNT; ++i) { 840 udata_closeSwapper(ds[i]); 841 } 842} 843 844int32_t 845Package::findItem(const char *name, int32_t length) const { 846 int32_t i, start, limit; 847 int result; 848 849 /* do a binary search for the string */ 850 start=0; 851 limit=itemCount; 852 while(start<limit) { 853 i=(start+limit)/2; 854 if(length>=0) { 855 result=strncmp(name, items[i].name, length); 856 } else { 857 result=strcmp(name, items[i].name); 858 } 859 860 if(result==0) { 861 /* found */ 862 if(length>=0) { 863 /* 864 * if we compared just prefixes, then we may need to back up 865 * to the first item with this prefix 866 */ 867 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 868 --i; 869 } 870 } 871 return i; 872 } else if(result<0) { 873 limit=i; 874 } else /* result>0 */ { 875 start=i+1; 876 } 877 } 878 879 return ~start; /* not found, return binary-not of the insertion point */ 880} 881 882void 883Package::findItems(const char *pattern) { 884 const char *wild; 885 886 if(pattern==NULL || *pattern==0) { 887 findNextIndex=-1; 888 return; 889 } 890 891 findPrefix=pattern; 892 findSuffix=NULL; 893 findSuffixLength=0; 894 895 wild=strchr(pattern, '*'); 896 if(wild==NULL) { 897 // no wildcard 898 findPrefixLength=(int32_t)strlen(pattern); 899 } else { 900 // one wildcard 901 findPrefixLength=(int32_t)(wild-pattern); 902 findSuffix=wild+1; 903 findSuffixLength=(int32_t)strlen(findSuffix); 904 if(NULL!=strchr(findSuffix, '*')) { 905 // two or more wildcards 906 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 907 exit(U_PARSE_ERROR); 908 } 909 } 910 911 if(findPrefixLength==0) { 912 findNextIndex=0; 913 } else { 914 findNextIndex=findItem(findPrefix, findPrefixLength); 915 } 916} 917 918int32_t 919Package::findNextItem() { 920 const char *name, *middle, *treeSep; 921 int32_t idx, nameLength, middleLength; 922 923 if(findNextIndex<0) { 924 return -1; 925 } 926 927 while(findNextIndex<itemCount) { 928 idx=findNextIndex++; 929 name=items[idx].name; 930 nameLength=(int32_t)strlen(name); 931 if(nameLength<(findPrefixLength+findSuffixLength)) { 932 // item name too short for prefix & suffix 933 continue; 934 } 935 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 936 // left the range of names with this prefix 937 break; 938 } 939 middle=name+findPrefixLength; 940 middleLength=nameLength-findPrefixLength-findSuffixLength; 941 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 942 // suffix does not match 943 continue; 944 } 945 // prefix & suffix match 946 947 if(matchMode&MATCH_NOSLASH) { 948 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 949 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 950 // the middle (matching the * wildcard) contains a tree separator / 951 continue; 952 } 953 } 954 955 // found a matching item 956 return idx; 957 } 958 959 // no more items 960 findNextIndex=-1; 961 return -1; 962} 963 964void 965Package::setMatchMode(uint32_t mode) { 966 matchMode=mode; 967} 968 969void 970Package::addItem(const char *name) { 971 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 972} 973 974void 975Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 976 int32_t idx; 977 978 idx=findItem(name); 979 if(idx<0) { 980 // new item, make space at the insertion point 981 ensureItemCapacity(); 982 // move the following items down 983 idx=~idx; 984 if(idx<itemCount) { 985 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 986 } 987 ++itemCount; 988 989 // reset this Item entry 990 memset(items+idx, 0, sizeof(Item)); 991 992 // copy the item's name 993 items[idx].name=allocString(TRUE, strlen(name)); 994 strcpy(items[idx].name, name); 995 pathToTree(items[idx].name); 996 } else { 997 // same-name item found, replace it 998 if(items[idx].isDataOwned) { 999 free(items[idx].data); 1000 } 1001 1002 // keep the item's name since it is the same 1003 } 1004 1005 // set the item's data 1006 items[idx].data=data; 1007 items[idx].length=length; 1008 items[idx].isDataOwned=isDataOwned; 1009 items[idx].type=type; 1010} 1011 1012void 1013Package::addFile(const char *filesPath, const char *name) { 1014 uint8_t *data; 1015 int32_t length; 1016 char type; 1017 1018 data=readFile(filesPath, name, length, type); 1019 // readFile() exits the tool if it fails 1020 addItem(name, data, length, TRUE, type); 1021} 1022 1023void 1024Package::addItems(const Package &listPkg) { 1025 const Item *pItem; 1026 int32_t i; 1027 1028 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1029 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1030 } 1031} 1032 1033void 1034Package::removeItem(int32_t idx) { 1035 if(idx>=0) { 1036 // remove the item 1037 if(items[idx].isDataOwned) { 1038 free(items[idx].data); 1039 } 1040 1041 // move the following items up 1042 if((idx+1)<itemCount) { 1043 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1044 } 1045 --itemCount; 1046 1047 if(idx<=findNextIndex) { 1048 --findNextIndex; 1049 } 1050 } 1051} 1052 1053void 1054Package::removeItems(const char *pattern) { 1055 int32_t idx; 1056 1057 findItems(pattern); 1058 while((idx=findNextItem())>=0) { 1059 removeItem(idx); 1060 } 1061} 1062 1063void 1064Package::removeItems(const Package &listPkg) { 1065 const Item *pItem; 1066 int32_t i; 1067 1068 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1069 removeItems(pItem->name); 1070 } 1071} 1072 1073void 1074Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1075 char filename[1024]; 1076 UDataSwapper *ds; 1077 FILE *file; 1078 Item *pItem; 1079 int32_t fileLength; 1080 uint8_t itemCharset, outCharset; 1081 UBool itemIsBigEndian, outIsBigEndian; 1082 1083 if(idx<0 || itemCount<=idx) { 1084 return; 1085 } 1086 pItem=items+idx; 1087 1088 // swap the data to the outType 1089 // outType==0: don't swap 1090 if(outType!=0 && pItem->type!=outType) { 1091 // open the swapper 1092 UErrorCode errorCode=U_ZERO_ERROR; 1093 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1094 makeTypeProps(outType, outCharset, outIsBigEndian); 1095 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1096 if(U_FAILURE(errorCode)) { 1097 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1098 (long)idx, u_errorName(errorCode)); 1099 exit(errorCode); 1100 } 1101 1102 ds->printError=printPackageError; 1103 ds->printErrorContext=stderr; 1104 1105 // swap the item from its platform properties to the desired ones 1106 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1107 if(U_FAILURE(errorCode)) { 1108 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1109 exit(errorCode); 1110 } 1111 udata_closeSwapper(ds); 1112 pItem->type=outType; 1113 } 1114 1115 // create the file and write its contents 1116 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1117 file=fopen(filename, "wb"); 1118 if(file==NULL) { 1119 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1120 exit(U_FILE_ACCESS_ERROR); 1121 } 1122 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1123 1124 if(ferror(file) || fileLength!=pItem->length) { 1125 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1126 exit(U_FILE_ACCESS_ERROR); 1127 } 1128 fclose(file); 1129} 1130 1131void 1132Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1133 extractItem(filesPath, items[idx].name, idx, outType); 1134} 1135 1136void 1137Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1138 int32_t idx; 1139 1140 findItems(pattern); 1141 while((idx=findNextItem())>=0) { 1142 extractItem(filesPath, idx, outType); 1143 } 1144} 1145 1146void 1147Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1148 const Item *pItem; 1149 int32_t i; 1150 1151 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1152 extractItems(filesPath, pItem->name, outType); 1153 } 1154} 1155 1156int32_t 1157Package::getItemCount() const { 1158 return itemCount; 1159} 1160 1161const Item * 1162Package::getItem(int32_t idx) const { 1163 if (0 <= idx && idx < itemCount) { 1164 return &items[idx]; 1165 } 1166 return NULL; 1167} 1168 1169void 1170Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1171 // check dependency: make sure the target item is in the package 1172 Package *me=(Package *)context; 1173 if(me->findItem(targetName)<0) { 1174 me->isMissingItems=TRUE; 1175 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1176 } 1177} 1178 1179UBool 1180Package::checkDependencies() { 1181 isMissingItems=FALSE; 1182 enumDependencies(this, checkDependency); 1183 return (UBool)!isMissingItems; 1184} 1185 1186void 1187Package::enumDependencies(void *context, CheckDependency check) { 1188 int32_t i; 1189 1190 for(i=0; i<itemCount; ++i) { 1191 enumDependencies(items+i, context, check); 1192 } 1193} 1194 1195char * 1196Package::allocString(UBool in, int32_t length) { 1197 char *p; 1198 int32_t top; 1199 1200 if(in) { 1201 top=inStringTop; 1202 p=inStrings+top; 1203 } else { 1204 top=outStringTop; 1205 p=outStrings+top; 1206 } 1207 top+=length+1; 1208 1209 if(top>STRING_STORE_SIZE) { 1210 fprintf(stderr, "icupkg: string storage overflow\n"); 1211 exit(U_BUFFER_OVERFLOW_ERROR); 1212 } 1213 if(in) { 1214 inStringTop=top; 1215 } else { 1216 outStringTop=top; 1217 } 1218 return p; 1219} 1220 1221void 1222Package::sortItems() { 1223 UErrorCode errorCode=U_ZERO_ERROR; 1224 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1225 if(U_FAILURE(errorCode)) { 1226 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1227 exit(errorCode); 1228 } 1229} 1230 1231void Package::setItemCapacity(int32_t max) 1232{ 1233 if(max<=itemMax) { 1234 return; 1235 } 1236 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1237 Item *oldItems = items; 1238 if(newItems == NULL) { 1239 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", max*sizeof(items[0]), max); 1240 exit(U_MEMORY_ALLOCATION_ERROR); 1241 } 1242 if(items && itemCount>0) { 1243 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1244 } 1245 itemMax = max; 1246 items = newItems; 1247 uprv_free(oldItems); 1248} 1249 1250void Package::ensureItemCapacity() 1251{ 1252 if((itemCount+1)>itemMax) { 1253 setItemCapacity(itemCount+kItemsChunk); 1254 } 1255} 1256 1257U_NAMESPACE_END 1258