package.cpp revision 85bf2e2fbc60a9f938064abc8127d61da7d19882
1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2009, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: package.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005aug25 14* created by: Markus W. Scherer 15* 16* Read, modify, and write ICU .dat data package files. 17* This is an integral part of the icupkg tool, moved to the toolutil library 18* because parts of tool implementations tend to be later shared by 19* other tools. 20* Subsumes functionality and implementation code from 21* gencmn, decmn, and icuswap tools. 22*/ 23 24#include "unicode/utypes.h" 25#include "unicode/putil.h" 26#include "unicode/udata.h" 27#include "cstring.h" 28#include "uarrsort.h" 29#include "ucmndata.h" 30#include "udataswp.h" 31#include "swapimpl.h" 32#include "toolutil.h" 33#include "package.h" 34 35#include <stdio.h> 36#include <stdlib.h> 37#include <string.h> 38 39// general definitions ----------------------------------------------------- *** 40 41#define LENGTHOF(array) (int32_t)(sizeof(array)/sizeof((array)[0])) 42 43/* UDataInfo cf. udata.h */ 44static const UDataInfo dataInfo={ 45 (uint16_t)sizeof(UDataInfo), 46 0, 47 48 U_IS_BIG_ENDIAN, 49 U_CHARSET_FAMILY, 50 (uint8_t)sizeof(UChar), 51 0, 52 53 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 54 {1, 0, 0, 0}, /* formatVersion */ 55 {3, 0, 0, 0} /* dataVersion */ 56}; 57 58U_CDECL_BEGIN 59static void U_CALLCONV 60printPackageError(void *context, const char *fmt, va_list args) { 61 vfprintf((FILE *)context, fmt, args); 62} 63U_CDECL_END 64 65static uint16_t 66readSwapUInt16(uint16_t x) { 67 return (uint16_t)((x<<8)|(x>>8)); 68} 69 70// platform types ---------------------------------------------------------- *** 71 72static const char *types="lb?e"; 73 74enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 75 76static inline int32_t 77makeTypeEnum(uint8_t charset, UBool isBigEndian) { 78 return 2*(int32_t)charset+isBigEndian; 79} 80 81static inline int32_t 82makeTypeEnum(char type) { 83 return 84 type == 'l' ? TYPE_L : 85 type == 'b' ? TYPE_B : 86 type == 'e' ? TYPE_E : 87 -1; 88} 89 90static inline char 91makeTypeLetter(uint8_t charset, UBool isBigEndian) { 92 return types[makeTypeEnum(charset, isBigEndian)]; 93} 94 95static inline char 96makeTypeLetter(int32_t typeEnum) { 97 return types[typeEnum]; 98} 99 100static void 101makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 102 int32_t typeEnum=makeTypeEnum(type); 103 charset=(uint8_t)(typeEnum>>1); 104 isBigEndian=(UBool)(typeEnum&1); 105} 106 107U_CFUNC const UDataInfo * 108getDataInfo(const uint8_t *data, int32_t length, 109 int32_t &infoLength, int32_t &headerLength, 110 UErrorCode *pErrorCode) { 111 const DataHeader *pHeader; 112 const UDataInfo *pInfo; 113 114 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 115 return NULL; 116 } 117 if( data==NULL || 118 (length>=0 && length<(int32_t)sizeof(DataHeader)) 119 ) { 120 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 121 return NULL; 122 } 123 124 pHeader=(const DataHeader *)data; 125 pInfo=&pHeader->info; 126 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 127 pHeader->dataHeader.magic1!=0xda || 128 pHeader->dataHeader.magic2!=0x27 || 129 pInfo->sizeofUChar!=2 130 ) { 131 *pErrorCode=U_UNSUPPORTED_ERROR; 132 return NULL; 133 } 134 135 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 136 headerLength=pHeader->dataHeader.headerSize; 137 infoLength=pInfo->size; 138 } else { 139 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 140 infoLength=readSwapUInt16(pInfo->size); 141 } 142 143 if( headerLength<(int32_t)sizeof(DataHeader) || 144 infoLength<(int32_t)sizeof(UDataInfo) || 145 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 146 (length>=0 && length<headerLength) 147 ) { 148 *pErrorCode=U_UNSUPPORTED_ERROR; 149 return NULL; 150 } 151 152 return pInfo; 153} 154 155static int32_t 156getTypeEnumForInputData(const uint8_t *data, int32_t length, 157 UErrorCode *pErrorCode) { 158 const UDataInfo *pInfo; 159 int32_t infoLength, headerLength; 160 161 /* getDataInfo() checks for illegal arguments */ 162 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 163 if(pInfo==NULL) { 164 return -1; 165 } 166 167 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 168} 169 170// file handling ----------------------------------------------------------- *** 171 172static void 173extractPackageName(const char *filename, 174 char pkg[], int32_t capacity) { 175 const char *basename; 176 int32_t len; 177 178 basename=findBasename(filename); 179 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 180 181 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 182 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 183 basename); 184 exit(U_ILLEGAL_ARGUMENT_ERROR); 185 } 186 187 if(len>=capacity) { 188 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 189 basename, (long)capacity); 190 exit(U_ILLEGAL_ARGUMENT_ERROR); 191 } 192 193 memcpy(pkg, basename, len); 194 pkg[len]=0; 195} 196 197static int32_t 198getFileLength(FILE *f) { 199 int32_t length; 200 201 fseek(f, 0, SEEK_END); 202 length=(int32_t)ftell(f); 203 fseek(f, 0, SEEK_SET); 204 return length; 205} 206 207/* 208 * Turn tree separators and alternate file separators into normal file separators. 209 */ 210#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 211#define treeToPath(s) 212#else 213static void 214treeToPath(char *s) { 215 char *t; 216 217 for(t=s; *t!=0; ++t) { 218 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 219 *t=U_FILE_SEP_CHAR; 220 } 221 } 222} 223#endif 224 225/* 226 * Turn file separators into tree separators. 227 */ 228#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 229#define pathToTree(s) 230#else 231static void 232pathToTree(char *s) { 233 char *t; 234 235 for(t=s; *t!=0; ++t) { 236 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 237 *t=U_TREE_ENTRY_SEP_CHAR; 238 } 239 } 240} 241#endif 242 243/* 244 * Prepend the path (if any) to the name and run the name through treeToName(). 245 */ 246static void 247makeFullFilename(const char *path, const char *name, 248 char *filename, int32_t capacity) { 249 char *s; 250 251 // prepend the path unless NULL or empty 252 if(path!=NULL && path[0]!=0) { 253 if((int32_t)(strlen(path)+1)>=capacity) { 254 fprintf(stderr, "pathname too long: \"%s\"\n", path); 255 exit(U_BUFFER_OVERFLOW_ERROR); 256 } 257 strcpy(filename, path); 258 259 // make sure the path ends with a file separator 260 s=strchr(filename, 0); 261 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 262 *s++=U_FILE_SEP_CHAR; 263 } 264 } else { 265 s=filename; 266 } 267 268 // turn the name into a filename, turn tree separators into file separators 269 if((int32_t)((s-filename)+strlen(name))>=capacity) { 270 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 271 exit(U_BUFFER_OVERFLOW_ERROR); 272 } 273 strcpy(s, name); 274 treeToPath(s); 275} 276 277static void 278makeFullFilenameAndDirs(const char *path, const char *name, 279 char *filename, int32_t capacity) { 280 char *sep; 281 UErrorCode errorCode; 282 283 makeFullFilename(path, name, filename, capacity); 284 285 // make tree directories 286 errorCode=U_ZERO_ERROR; 287 sep=strchr(filename, 0)-strlen(name); 288 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 289 if(sep!=filename) { 290 *sep=0; // truncate temporarily 291 uprv_mkdir(filename, &errorCode); 292 if(U_FAILURE(errorCode)) { 293 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 294 exit(U_FILE_ACCESS_ERROR); 295 } 296 } 297 *sep++=U_FILE_SEP_CHAR; // restore file separator character 298 } 299} 300 301static uint8_t * 302readFile(const char *path, const char *name, int32_t &length, char &type) { 303 char filename[1024]; 304 FILE *file; 305 uint8_t *data; 306 UErrorCode errorCode; 307 int32_t fileLength, typeEnum; 308 309 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 310 311 /* open the input file, get its length, allocate memory for it, read the file */ 312 file=fopen(filename, "rb"); 313 if(file==NULL) { 314 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 315 exit(U_FILE_ACCESS_ERROR); 316 } 317 318 /* get the file length */ 319 fileLength=getFileLength(file); 320 if(ferror(file) || fileLength<=0) { 321 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 322 fclose(file); 323 exit(U_FILE_ACCESS_ERROR); 324 } 325 326 /* allocate the buffer, pad to multiple of 16 */ 327 length=(fileLength+0xf)&~0xf; 328 data=(uint8_t *)malloc(length); 329 if(data==NULL) { 330 fclose(file); 331 exit(U_MEMORY_ALLOCATION_ERROR); 332 } 333 334 /* read the file */ 335 if(fileLength!=(int32_t)fread(data, 1, fileLength, file)) { 336 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 337 fclose(file); 338 free(data); 339 exit(U_FILE_ACCESS_ERROR); 340 } 341 342 /* pad the file to a multiple of 16 using the usual padding byte */ 343 if(fileLength<length) { 344 memset(data+fileLength, 0xaa, length-fileLength); 345 } 346 347 fclose(file); 348 349 // minimum check for ICU-format data 350 errorCode=U_ZERO_ERROR; 351 typeEnum=getTypeEnumForInputData(data, length, &errorCode); 352 if(typeEnum<0 || U_FAILURE(errorCode)) { 353 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 354 free(data); 355 exit(U_INVALID_FORMAT_ERROR); 356 } 357 type=makeTypeLetter(typeEnum); 358 359 return data; 360} 361 362// .dat package file representation ---------------------------------------- *** 363 364U_CDECL_BEGIN 365 366static int32_t U_CALLCONV 367compareItems(const void * /*context*/, const void *left, const void *right) { 368 U_NAMESPACE_USE 369 370 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 371} 372 373U_CDECL_END 374 375U_NAMESPACE_BEGIN 376 377Package::Package() { 378 inPkgName[0]=0; 379 inData=NULL; 380 inLength=0; 381 inCharset=U_CHARSET_FAMILY; 382 inIsBigEndian=U_IS_BIG_ENDIAN; 383 384 itemCount=0; 385 inStringTop=outStringTop=0; 386 387 matchMode=0; 388 findPrefix=findSuffix=NULL; 389 findPrefixLength=findSuffixLength=0; 390 findNextIndex=-1; 391 392 // create a header for an empty package 393 DataHeader *pHeader; 394 pHeader=(DataHeader *)header; 395 pHeader->dataHeader.magic1=0xda; 396 pHeader->dataHeader.magic2=0x27; 397 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 398 headerLength=(int32_t)(4+sizeof(dataInfo)); 399 if(headerLength&0xf) { 400 /* NUL-pad the header to a multiple of 16 */ 401 int32_t length=(headerLength+0xf)&~0xf; 402 memset(header+headerLength, 0, length-headerLength); 403 headerLength=length; 404 } 405 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 406} 407 408Package::~Package() { 409 int32_t idx; 410 411 free(inData); 412 413 for(idx=0; idx<itemCount; ++idx) { 414 if(items[idx].isDataOwned) { 415 free(items[idx].data); 416 } 417 } 418} 419 420void 421Package::readPackage(const char *filename) { 422 UDataSwapper *ds; 423 const UDataInfo *pInfo; 424 UErrorCode errorCode; 425 426 const uint8_t *inBytes; 427 428 int32_t length, offset, i; 429 int32_t itemLength, typeEnum; 430 char type; 431 432 const UDataOffsetTOCEntry *inEntries; 433 434 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 435 436 /* read the file */ 437 inData=readFile(NULL, filename, inLength, type); 438 length=inLength; 439 440 /* 441 * swap the header - even if the swapping itself is a no-op 442 * because it tells us the header length 443 */ 444 errorCode=U_ZERO_ERROR; 445 makeTypeProps(type, inCharset, inIsBigEndian); 446 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 447 if(U_FAILURE(errorCode)) { 448 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 449 filename, u_errorName(errorCode)); 450 exit(errorCode); 451 } 452 453 ds->printError=printPackageError; 454 ds->printErrorContext=stderr; 455 456 headerLength=sizeof(header); 457 if(length<headerLength) { 458 headerLength=length; 459 } 460 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 461 if(U_FAILURE(errorCode)) { 462 exit(errorCode); 463 } 464 465 /* check data format and format version */ 466 pInfo=(const UDataInfo *)((const char *)inData+4); 467 if(!( 468 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 469 pInfo->dataFormat[1]==0x6d && 470 pInfo->dataFormat[2]==0x6e && 471 pInfo->dataFormat[3]==0x44 && 472 pInfo->formatVersion[0]==1 473 )) { 474 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 475 pInfo->dataFormat[0], pInfo->dataFormat[1], 476 pInfo->dataFormat[2], pInfo->dataFormat[3], 477 pInfo->formatVersion[0]); 478 exit(U_UNSUPPORTED_ERROR); 479 } 480 inIsBigEndian=(UBool)pInfo->isBigEndian; 481 inCharset=pInfo->charsetFamily; 482 483 inBytes=(const uint8_t *)inData+headerLength; 484 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 485 486 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 487 length-=headerLength; 488 if(length<4) { 489 /* itemCount does not fit */ 490 offset=0x7fffffff; 491 } else { 492 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 493 if(itemCount==0) { 494 offset=4; 495 } else if(length<(4+8*itemCount)) { 496 /* ToC table does not fit */ 497 offset=0x7fffffff; 498 } else { 499 /* offset of the last item plus at least 20 bytes for its header */ 500 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 501 } 502 } 503 if(length<offset) { 504 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 505 (long)length); 506 exit(U_INDEX_OUTOFBOUNDS_ERROR); 507 } 508 /* do not modify the package length variable until the last item's length is set */ 509 510 if(itemCount>0) { 511 char prefix[MAX_PKG_NAME_LENGTH+4]; 512 char *s, *inItemStrings; 513 int32_t inPkgNameLength, prefixLength, stringsOffset; 514 515 if(itemCount>MAX_FILE_COUNT) { 516 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 517 exit(U_BUFFER_OVERFLOW_ERROR); 518 } 519 520 /* swap the item name strings */ 521 stringsOffset=4+8*itemCount; 522 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 523 524 // don't include padding bytes at the end of the item names 525 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 526 --itemLength; 527 } 528 529 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 530 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 531 exit(U_BUFFER_OVERFLOW_ERROR); 532 } 533 534 inItemStrings=inStrings+inStringTop; 535 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 536 if(U_FAILURE(errorCode)) { 537 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 538 exit(U_INVALID_FORMAT_ERROR); 539 } 540 inStringTop+=itemLength; 541 542 // reset the Item entries 543 memset(items, 0, itemCount*sizeof(Item)); 544 545 inPkgNameLength=strlen(inPkgName); 546 memcpy(prefix, inPkgName, inPkgNameLength); 547 prefixLength=inPkgNameLength; 548 549 /* 550 * Get the common prefix of the items. 551 * New-style ICU .dat packages use tree separators ('/') between package names, 552 * tree names, and item names, 553 * while old-style ICU .dat packages (before multi-tree support) 554 * use an underscore ('_') between package and item names. 555 */ 556 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 557 s=inItemStrings+offset; 558 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 559 0==memcmp(s, inPkgName, inPkgNameLength) && 560 s[inPkgNameLength]=='_' 561 ) { 562 // old-style .dat package 563 prefix[prefixLength++]='_'; 564 } else { 565 // new-style .dat package 566 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 567 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 568 // then the test in the loop below will fail 569 } 570 prefix[prefixLength]=0; 571 572 /* read the ToC table */ 573 for(i=0; i<itemCount; ++i) { 574 // skip the package part of the item name, error if it does not match the actual package name 575 // or if nothing follows the package name 576 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 577 s=inItemStrings+offset; 578 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 579 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 580 s, prefix); 581 exit(U_UNSUPPORTED_ERROR); 582 } 583 items[i].name=s+prefixLength; 584 585 // set the item's data 586 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 587 if(i>0) { 588 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 589 590 // set the previous item's platform type 591 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 592 if(typeEnum<0 || U_FAILURE(errorCode)) { 593 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 594 exit(U_INVALID_FORMAT_ERROR); 595 } 596 items[i-1].type=makeTypeLetter(typeEnum); 597 } 598 items[i].isDataOwned=FALSE; 599 } 600 // set the last item's length 601 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 602 603 // set the last item's platform type 604 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 605 if(typeEnum<0 || U_FAILURE(errorCode)) { 606 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 607 exit(U_INVALID_FORMAT_ERROR); 608 } 609 items[itemCount-1].type=makeTypeLetter(typeEnum); 610 611 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 612 // sort the item names for the local charset 613 sortItems(); 614 } 615 } 616 617 udata_closeSwapper(ds); 618} 619 620char 621Package::getInType() { 622 return makeTypeLetter(inCharset, inIsBigEndian); 623} 624 625void 626Package::writePackage(const char *filename, char outType, const char *comment) { 627 char prefix[MAX_PKG_NAME_LENGTH+4]; 628 UDataOffsetTOCEntry entry; 629 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 630 FILE *file; 631 Item *pItem; 632 char *name; 633 UErrorCode errorCode; 634 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 635 uint8_t outCharset; 636 UBool outIsBigEndian; 637 638 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 639 640 // if there is an explicit comment, then use it, else use what's in the current header 641 if(comment!=NULL) { 642 /* get the header size minus the current comment */ 643 DataHeader *pHeader; 644 int32_t length; 645 646 pHeader=(DataHeader *)header; 647 headerLength=4+pHeader->info.size; 648 length=(int32_t)strlen(comment); 649 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 650 fprintf(stderr, "icupkg: comment too long\n"); 651 exit(U_BUFFER_OVERFLOW_ERROR); 652 } 653 memcpy(header+headerLength, comment, length+1); 654 headerLength+=length; 655 if(headerLength&0xf) { 656 /* NUL-pad the header to a multiple of 16 */ 657 length=(headerLength+0xf)&~0xf; 658 memset(header+headerLength, 0, length-headerLength); 659 headerLength=length; 660 } 661 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 662 } 663 664 makeTypeProps(outType, outCharset, outIsBigEndian); 665 666 // open (TYPE_COUNT-2) swappers 667 // one is a no-op for local type==outType 668 // one type (TYPE_LE) is bogus 669 errorCode=U_ZERO_ERROR; 670 i=makeTypeEnum(outType); 671 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 672 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 673 ds[TYPE_LE]=NULL; 674 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 675 if(U_FAILURE(errorCode)) { 676 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 677 exit(errorCode); 678 } 679 for(i=0; i<TYPE_COUNT; ++i) { 680 if(ds[i]!=NULL) { 681 ds[i]->printError=printPackageError; 682 ds[i]->printErrorContext=stderr; 683 } 684 } 685 686 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 687 688 // create the file and write its contents 689 file=fopen(filename, "wb"); 690 if(file==NULL) { 691 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 692 exit(U_FILE_ACCESS_ERROR); 693 } 694 695 // swap and write the header 696 if(dsLocalToOut!=NULL) { 697 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 698 if(U_FAILURE(errorCode)) { 699 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 700 exit(errorCode); 701 } 702 } 703 length=(int32_t)fwrite(header, 1, headerLength, file); 704 if(length!=headerLength) { 705 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 706 exit(U_FILE_ACCESS_ERROR); 707 } 708 709 // prepare and swap the package name with a tree separator 710 // for prepending to item names 711 strcat(prefix, U_TREE_ENTRY_SEP_STRING); 712 prefixLength=(int32_t)strlen(prefix); 713 if(dsLocalToOut!=NULL) { 714 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 715 if(U_FAILURE(errorCode)) { 716 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 717 exit(errorCode); 718 } 719 720 // swap and sort the item names (sorting needs to be done in the output charset) 721 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 722 if(U_FAILURE(errorCode)) { 723 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 724 exit(errorCode); 725 } 726 sortItems(); 727 } 728 729 // create the output item names in sorted order, with the package name prepended to each 730 for(i=0; i<itemCount; ++i) { 731 length=(int32_t)strlen(items[i].name); 732 name=allocString(FALSE, length+prefixLength); 733 memcpy(name, prefix, prefixLength); 734 memcpy(name+prefixLength, items[i].name, length+1); 735 items[i].name=name; 736 } 737 738 // calculate offsets for item names and items, pad to 16-align items 739 // align only the first item; each item's length is a multiple of 16 740 basenameOffset=4+8*itemCount; 741 offset=basenameOffset+outStringTop; 742 if((length=(offset&15))!=0) { 743 length=16-length; 744 memset(allocString(FALSE, length-1), 0xaa, length); 745 offset+=length; 746 } 747 748 // write the table of contents 749 // first the itemCount 750 outInt32=itemCount; 751 if(dsLocalToOut!=NULL) { 752 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 753 if(U_FAILURE(errorCode)) { 754 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 755 exit(errorCode); 756 } 757 } 758 length=(int32_t)fwrite(&outInt32, 1, 4, file); 759 if(length!=4) { 760 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 761 exit(U_FILE_ACCESS_ERROR); 762 } 763 764 // then write the item entries (and collect the maxItemLength) 765 maxItemLength=0; 766 for(i=0; i<itemCount; ++i) { 767 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 768 entry.dataOffset=(uint32_t)offset; 769 if(dsLocalToOut!=NULL) { 770 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 771 if(U_FAILURE(errorCode)) { 772 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 773 exit(errorCode); 774 } 775 } 776 length=(int32_t)fwrite(&entry, 1, 8, file); 777 if(length!=8) { 778 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 779 exit(U_FILE_ACCESS_ERROR); 780 } 781 782 length=items[i].length; 783 if(length>maxItemLength) { 784 maxItemLength=length; 785 } 786 offset+=length; 787 } 788 789 // write the item names 790 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 791 if(length!=outStringTop) { 792 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 793 exit(U_FILE_ACCESS_ERROR); 794 } 795 796 // write the items 797 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 798 int32_t type=makeTypeEnum(pItem->type); 799 if(ds[type]!=NULL) { 800 // swap each item from its platform properties to the desired ones 801 udata_swap( 802 ds[type], 803 pItem->data, pItem->length, pItem->data, 804 &errorCode); 805 if(U_FAILURE(errorCode)) { 806 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 807 exit(errorCode); 808 } 809 } 810 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 811 if(length!=pItem->length) { 812 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 813 exit(U_FILE_ACCESS_ERROR); 814 } 815 } 816 817 if(ferror(file)) { 818 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 819 exit(U_FILE_ACCESS_ERROR); 820 } 821 822 fclose(file); 823 for(i=0; i<TYPE_COUNT; ++i) { 824 udata_closeSwapper(ds[i]); 825 } 826} 827 828int32_t 829Package::findItem(const char *name, int32_t length) const { 830 int32_t i, start, limit; 831 int result; 832 833 /* do a binary search for the string */ 834 start=0; 835 limit=itemCount; 836 while(start<limit) { 837 i=(start+limit)/2; 838 if(length>=0) { 839 result=strncmp(name, items[i].name, length); 840 } else { 841 result=strcmp(name, items[i].name); 842 } 843 844 if(result==0) { 845 /* found */ 846 if(length>=0) { 847 /* 848 * if we compared just prefixes, then we may need to back up 849 * to the first item with this prefix 850 */ 851 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 852 --i; 853 } 854 } 855 return i; 856 } else if(result<0) { 857 limit=i; 858 } else /* result>0 */ { 859 start=i+1; 860 } 861 } 862 863 return ~start; /* not found, return binary-not of the insertion point */ 864} 865 866void 867Package::findItems(const char *pattern) { 868 const char *wild; 869 870 if(pattern==NULL || *pattern==0) { 871 findNextIndex=-1; 872 return; 873 } 874 875 findPrefix=pattern; 876 findSuffix=NULL; 877 findSuffixLength=0; 878 879 wild=strchr(pattern, '*'); 880 if(wild==NULL) { 881 // no wildcard 882 findPrefixLength=(int32_t)strlen(pattern); 883 } else { 884 // one wildcard 885 findPrefixLength=(int32_t)(wild-pattern); 886 findSuffix=wild+1; 887 findSuffixLength=(int32_t)strlen(findSuffix); 888 if(NULL!=strchr(findSuffix, '*')) { 889 // two or more wildcards 890 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 891 exit(U_PARSE_ERROR); 892 } 893 } 894 895 if(findPrefixLength==0) { 896 findNextIndex=0; 897 } else { 898 findNextIndex=findItem(findPrefix, findPrefixLength); 899 } 900} 901 902int32_t 903Package::findNextItem() { 904 const char *name, *middle, *treeSep; 905 int32_t idx, nameLength, middleLength; 906 907 if(findNextIndex<0) { 908 return -1; 909 } 910 911 while(findNextIndex<itemCount) { 912 idx=findNextIndex++; 913 name=items[idx].name; 914 nameLength=(int32_t)strlen(name); 915 if(nameLength<(findPrefixLength+findSuffixLength)) { 916 // item name too short for prefix & suffix 917 continue; 918 } 919 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 920 // left the range of names with this prefix 921 break; 922 } 923 middle=name+findPrefixLength; 924 middleLength=nameLength-findPrefixLength-findSuffixLength; 925 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 926 // suffix does not match 927 continue; 928 } 929 // prefix & suffix match 930 931 if(matchMode&MATCH_NOSLASH) { 932 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 933 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 934 // the middle (matching the * wildcard) contains a tree separator / 935 continue; 936 } 937 } 938 939 // found a matching item 940 return idx; 941 } 942 943 // no more items 944 findNextIndex=-1; 945 return -1; 946} 947 948void 949Package::setMatchMode(uint32_t mode) { 950 matchMode=mode; 951} 952 953void 954Package::addItem(const char *name) { 955 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 956} 957 958void 959Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 960 int32_t idx; 961 962 idx=findItem(name); 963 if(idx<0) { 964 // new item, make space at the insertion point 965 if(itemCount>=MAX_FILE_COUNT) { 966 fprintf(stderr, "icupkg: too many items, maximum is %d\n", MAX_FILE_COUNT); 967 exit(U_BUFFER_OVERFLOW_ERROR); 968 } 969 // move the following items down 970 idx=~idx; 971 if(idx<itemCount) { 972 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 973 } 974 ++itemCount; 975 976 // reset this Item entry 977 memset(items+idx, 0, sizeof(Item)); 978 979 // copy the item's name 980 items[idx].name=allocString(TRUE, strlen(name)); 981 strcpy(items[idx].name, name); 982 pathToTree(items[idx].name); 983 } else { 984 // same-name item found, replace it 985 if(items[idx].isDataOwned) { 986 free(items[idx].data); 987 } 988 989 // keep the item's name since it is the same 990 } 991 992 // set the item's data 993 items[idx].data=data; 994 items[idx].length=length; 995 items[idx].isDataOwned=isDataOwned; 996 items[idx].type=type; 997} 998 999void 1000Package::addFile(const char *filesPath, const char *name) { 1001 uint8_t *data; 1002 int32_t length; 1003 char type; 1004 1005 data=readFile(filesPath, name, length, type); 1006 // readFile() exits the tool if it fails 1007 addItem(name, data, length, TRUE, type); 1008} 1009 1010void 1011Package::addItems(const Package &listPkg) { 1012 const Item *pItem; 1013 int32_t i; 1014 1015 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1016 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1017 } 1018} 1019 1020void 1021Package::removeItem(int32_t idx) { 1022 if(idx>=0) { 1023 // remove the item 1024 if(items[idx].isDataOwned) { 1025 free(items[idx].data); 1026 } 1027 1028 // move the following items up 1029 if((idx+1)<itemCount) { 1030 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1031 } 1032 --itemCount; 1033 1034 if(idx<=findNextIndex) { 1035 --findNextIndex; 1036 } 1037 } 1038} 1039 1040void 1041Package::removeItems(const char *pattern) { 1042 int32_t idx; 1043 1044 findItems(pattern); 1045 while((idx=findNextItem())>=0) { 1046 removeItem(idx); 1047 } 1048} 1049 1050void 1051Package::removeItems(const Package &listPkg) { 1052 const Item *pItem; 1053 int32_t i; 1054 1055 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1056 removeItems(pItem->name); 1057 } 1058} 1059 1060void 1061Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1062 char filename[1024]; 1063 UDataSwapper *ds; 1064 FILE *file; 1065 Item *pItem; 1066 int32_t fileLength; 1067 uint8_t itemCharset, outCharset; 1068 UBool itemIsBigEndian, outIsBigEndian; 1069 1070 if(idx<0 || itemCount<=idx) { 1071 return; 1072 } 1073 pItem=items+idx; 1074 1075 // swap the data to the outType 1076 // outType==0: don't swap 1077 if(outType!=0 && pItem->type!=outType) { 1078 // open the swapper 1079 UErrorCode errorCode=U_ZERO_ERROR; 1080 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1081 makeTypeProps(outType, outCharset, outIsBigEndian); 1082 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1083 if(U_FAILURE(errorCode)) { 1084 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1085 (long)idx, u_errorName(errorCode)); 1086 exit(errorCode); 1087 } 1088 1089 ds->printError=printPackageError; 1090 ds->printErrorContext=stderr; 1091 1092 // swap the item from its platform properties to the desired ones 1093 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1094 if(U_FAILURE(errorCode)) { 1095 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1096 exit(errorCode); 1097 } 1098 udata_closeSwapper(ds); 1099 } 1100 1101 // create the file and write its contents 1102 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1103 file=fopen(filename, "wb"); 1104 if(file==NULL) { 1105 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1106 exit(U_FILE_ACCESS_ERROR); 1107 } 1108 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1109 1110 if(ferror(file) || fileLength!=pItem->length) { 1111 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1112 exit(U_FILE_ACCESS_ERROR); 1113 } 1114 fclose(file); 1115} 1116 1117void 1118Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1119 extractItem(filesPath, items[idx].name, idx, outType); 1120} 1121 1122void 1123Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1124 int32_t idx; 1125 1126 findItems(pattern); 1127 while((idx=findNextItem())>=0) { 1128 extractItem(filesPath, idx, outType); 1129 } 1130} 1131 1132void 1133Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1134 const Item *pItem; 1135 int32_t i; 1136 1137 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1138 extractItems(filesPath, pItem->name, outType); 1139 } 1140} 1141 1142int32_t 1143Package::getItemCount() const { 1144 return itemCount; 1145} 1146 1147const Item * 1148Package::getItem(int32_t idx) const { 1149 if (0 <= idx && idx < itemCount) { 1150 return &items[idx]; 1151 } 1152 return NULL; 1153} 1154 1155void 1156Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1157 // check dependency: make sure the target item is in the package 1158 Package *me=(Package *)context; 1159 if(me->findItem(targetName)<0) { 1160 me->isMissingItems=TRUE; 1161 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1162 } 1163} 1164 1165UBool 1166Package::checkDependencies() { 1167 isMissingItems=FALSE; 1168 enumDependencies(this, checkDependency); 1169 return (UBool)!isMissingItems; 1170} 1171 1172void 1173Package::enumDependencies(void *context, CheckDependency check) { 1174 int32_t i; 1175 1176 for(i=0; i<itemCount; ++i) { 1177 enumDependencies(items+i, context, check); 1178 } 1179} 1180 1181char * 1182Package::allocString(UBool in, int32_t length) { 1183 char *p; 1184 int32_t top; 1185 1186 if(in) { 1187 top=inStringTop; 1188 p=inStrings+top; 1189 } else { 1190 top=outStringTop; 1191 p=outStrings+top; 1192 } 1193 top+=length+1; 1194 1195 if(top>STRING_STORE_SIZE) { 1196 fprintf(stderr, "icupkg: string storage overflow\n"); 1197 exit(U_BUFFER_OVERFLOW_ERROR); 1198 } 1199 if(in) { 1200 inStringTop=top; 1201 } else { 1202 outStringTop=top; 1203 } 1204 return p; 1205} 1206 1207void 1208Package::sortItems() { 1209 UErrorCode errorCode=U_ZERO_ERROR; 1210 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1211 if(U_FAILURE(errorCode)) { 1212 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1213 exit(errorCode); 1214 } 1215} 1216 1217U_NAMESPACE_END 1218