1/* 2******************************************************************************* 3* 4* Copyright (C) 1999-2015, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: package.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2005aug25 14* created by: Markus W. Scherer 15* 16* Read, modify, and write ICU .dat data package files. 17* This is an integral part of the icupkg tool, moved to the toolutil library 18* because parts of tool implementations tend to be later shared by 19* other tools. 20* Subsumes functionality and implementation code from 21* gencmn, decmn, and icuswap tools. 22*/ 23 24#include "unicode/utypes.h" 25#include "unicode/putil.h" 26#include "unicode/udata.h" 27#include "cstring.h" 28#include "uarrsort.h" 29#include "ucmndata.h" 30#include "udataswp.h" 31#include "swapimpl.h" 32#include "toolutil.h" 33#include "package.h" 34#include "cmemory.h" 35 36#include <stdio.h> 37#include <stdlib.h> 38#include <string.h> 39 40 41static const int32_t kItemsChunk = 256; /* How much to increase the filesarray by each time */ 42 43// general definitions ----------------------------------------------------- *** 44 45/* UDataInfo cf. udata.h */ 46static const UDataInfo dataInfo={ 47 (uint16_t)sizeof(UDataInfo), 48 0, 49 50 U_IS_BIG_ENDIAN, 51 U_CHARSET_FAMILY, 52 (uint8_t)sizeof(UChar), 53 0, 54 55 {0x43, 0x6d, 0x6e, 0x44}, /* dataFormat="CmnD" */ 56 {1, 0, 0, 0}, /* formatVersion */ 57 {3, 0, 0, 0} /* dataVersion */ 58}; 59 60U_CDECL_BEGIN 61static void U_CALLCONV 62printPackageError(void *context, const char *fmt, va_list args) { 63 vfprintf((FILE *)context, fmt, args); 64} 65U_CDECL_END 66 67static uint16_t 68readSwapUInt16(uint16_t x) { 69 return (uint16_t)((x<<8)|(x>>8)); 70} 71 72// platform types ---------------------------------------------------------- *** 73 74static const char *types="lb?e"; 75 76enum { TYPE_L, TYPE_B, TYPE_LE, TYPE_E, TYPE_COUNT }; 77 78static inline int32_t 79makeTypeEnum(uint8_t charset, UBool isBigEndian) { 80 return 2*(int32_t)charset+isBigEndian; 81} 82 83static inline int32_t 84makeTypeEnum(char type) { 85 return 86 type == 'l' ? TYPE_L : 87 type == 'b' ? TYPE_B : 88 type == 'e' ? TYPE_E : 89 -1; 90} 91 92static inline char 93makeTypeLetter(uint8_t charset, UBool isBigEndian) { 94 return types[makeTypeEnum(charset, isBigEndian)]; 95} 96 97static inline char 98makeTypeLetter(int32_t typeEnum) { 99 return types[typeEnum]; 100} 101 102static void 103makeTypeProps(char type, uint8_t &charset, UBool &isBigEndian) { 104 int32_t typeEnum=makeTypeEnum(type); 105 charset=(uint8_t)(typeEnum>>1); 106 isBigEndian=(UBool)(typeEnum&1); 107} 108 109U_CFUNC const UDataInfo * 110getDataInfo(const uint8_t *data, int32_t length, 111 int32_t &infoLength, int32_t &headerLength, 112 UErrorCode *pErrorCode) { 113 const DataHeader *pHeader; 114 const UDataInfo *pInfo; 115 116 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 117 return NULL; 118 } 119 if( data==NULL || 120 (length>=0 && length<(int32_t)sizeof(DataHeader)) 121 ) { 122 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 123 return NULL; 124 } 125 126 pHeader=(const DataHeader *)data; 127 pInfo=&pHeader->info; 128 if( (length>=0 && length<(int32_t)sizeof(DataHeader)) || 129 pHeader->dataHeader.magic1!=0xda || 130 pHeader->dataHeader.magic2!=0x27 || 131 pInfo->sizeofUChar!=2 132 ) { 133 *pErrorCode=U_UNSUPPORTED_ERROR; 134 return NULL; 135 } 136 137 if(pInfo->isBigEndian==U_IS_BIG_ENDIAN) { 138 headerLength=pHeader->dataHeader.headerSize; 139 infoLength=pInfo->size; 140 } else { 141 headerLength=readSwapUInt16(pHeader->dataHeader.headerSize); 142 infoLength=readSwapUInt16(pInfo->size); 143 } 144 145 if( headerLength<(int32_t)sizeof(DataHeader) || 146 infoLength<(int32_t)sizeof(UDataInfo) || 147 headerLength<(int32_t)(sizeof(pHeader->dataHeader)+infoLength) || 148 (length>=0 && length<headerLength) 149 ) { 150 *pErrorCode=U_UNSUPPORTED_ERROR; 151 return NULL; 152 } 153 154 return pInfo; 155} 156 157static int32_t 158getTypeEnumForInputData(const uint8_t *data, int32_t length, 159 UErrorCode *pErrorCode) { 160 const UDataInfo *pInfo; 161 int32_t infoLength, headerLength; 162 163 /* getDataInfo() checks for illegal arguments */ 164 pInfo=getDataInfo(data, length, infoLength, headerLength, pErrorCode); 165 if(pInfo==NULL) { 166 return -1; 167 } 168 169 return makeTypeEnum(pInfo->charsetFamily, (UBool)pInfo->isBigEndian); 170} 171 172// file handling ----------------------------------------------------------- *** 173 174static void 175extractPackageName(const char *filename, 176 char pkg[], int32_t capacity) { 177 const char *basename; 178 int32_t len; 179 180 basename=findBasename(filename); 181 len=(int32_t)strlen(basename)-4; /* -4: subtract the length of ".dat" */ 182 183 if(len<=0 || 0!=strcmp(basename+len, ".dat")) { 184 fprintf(stderr, "icupkg: \"%s\" is not recognized as a package filename (must end with .dat)\n", 185 basename); 186 exit(U_ILLEGAL_ARGUMENT_ERROR); 187 } 188 189 if(len>=capacity) { 190 fprintf(stderr, "icupkg: the package name \"%s\" is too long (>=%ld)\n", 191 basename, (long)capacity); 192 exit(U_ILLEGAL_ARGUMENT_ERROR); 193 } 194 195 memcpy(pkg, basename, len); 196 pkg[len]=0; 197} 198 199static int32_t 200getFileLength(FILE *f) { 201 int32_t length; 202 203 fseek(f, 0, SEEK_END); 204 length=(int32_t)ftell(f); 205 fseek(f, 0, SEEK_SET); 206 return length; 207} 208 209/* 210 * Turn tree separators and alternate file separators into normal file separators. 211 */ 212#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 213#define treeToPath(s) 214#else 215static void 216treeToPath(char *s) { 217 char *t; 218 219 for(t=s; *t!=0; ++t) { 220 if(*t==U_TREE_ENTRY_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 221 *t=U_FILE_SEP_CHAR; 222 } 223 } 224} 225#endif 226 227/* 228 * Turn file separators into tree separators. 229 */ 230#if U_TREE_ENTRY_SEP_CHAR==U_FILE_SEP_CHAR && U_FILE_ALT_SEP_CHAR==U_FILE_SEP_CHAR 231#define pathToTree(s) 232#else 233static void 234pathToTree(char *s) { 235 char *t; 236 237 for(t=s; *t!=0; ++t) { 238 if(*t==U_FILE_SEP_CHAR || *t==U_FILE_ALT_SEP_CHAR) { 239 *t=U_TREE_ENTRY_SEP_CHAR; 240 } 241 } 242} 243#endif 244 245/* 246 * Prepend the path (if any) to the name and run the name through treeToName(). 247 */ 248static void 249makeFullFilename(const char *path, const char *name, 250 char *filename, int32_t capacity) { 251 char *s; 252 253 // prepend the path unless NULL or empty 254 if(path!=NULL && path[0]!=0) { 255 if((int32_t)(strlen(path)+1)>=capacity) { 256 fprintf(stderr, "pathname too long: \"%s\"\n", path); 257 exit(U_BUFFER_OVERFLOW_ERROR); 258 } 259 strcpy(filename, path); 260 261 // make sure the path ends with a file separator 262 s=strchr(filename, 0); 263 if(*(s-1)!=U_FILE_SEP_CHAR && *(s-1)!=U_FILE_ALT_SEP_CHAR) { 264 *s++=U_FILE_SEP_CHAR; 265 } 266 } else { 267 s=filename; 268 } 269 270 // turn the name into a filename, turn tree separators into file separators 271 if((int32_t)((s-filename)+strlen(name))>=capacity) { 272 fprintf(stderr, "path/filename too long: \"%s%s\"\n", filename, name); 273 exit(U_BUFFER_OVERFLOW_ERROR); 274 } 275 strcpy(s, name); 276 treeToPath(s); 277} 278 279static void 280makeFullFilenameAndDirs(const char *path, const char *name, 281 char *filename, int32_t capacity) { 282 char *sep; 283 UErrorCode errorCode; 284 285 makeFullFilename(path, name, filename, capacity); 286 287 // make tree directories 288 errorCode=U_ZERO_ERROR; 289 sep=strchr(filename, 0)-strlen(name); 290 while((sep=strchr(sep, U_FILE_SEP_CHAR))!=NULL) { 291 if(sep!=filename) { 292 *sep=0; // truncate temporarily 293 uprv_mkdir(filename, &errorCode); 294 if(U_FAILURE(errorCode)) { 295 fprintf(stderr, "icupkg: unable to create tree directory \"%s\"\n", filename); 296 exit(U_FILE_ACCESS_ERROR); 297 } 298 } 299 *sep++=U_FILE_SEP_CHAR; // restore file separator character 300 } 301} 302 303static uint8_t * 304readFile(const char *path, const char *name, int32_t &length, char &type) { 305 char filename[1024]; 306 FILE *file; 307 UErrorCode errorCode; 308 int32_t fileLength, typeEnum; 309 310 makeFullFilename(path, name, filename, (int32_t)sizeof(filename)); 311 312 /* open the input file, get its length, allocate memory for it, read the file */ 313 file=fopen(filename, "rb"); 314 if(file==NULL) { 315 fprintf(stderr, "icupkg: unable to open input file \"%s\"\n", filename); 316 exit(U_FILE_ACCESS_ERROR); 317 } 318 319 /* get the file length */ 320 fileLength=getFileLength(file); 321 if(ferror(file) || fileLength<=0) { 322 fprintf(stderr, "icupkg: empty input file \"%s\"\n", filename); 323 fclose(file); 324 exit(U_FILE_ACCESS_ERROR); 325 } 326 327 /* allocate the buffer, pad to multiple of 16 */ 328 length=(fileLength+0xf)&~0xf; 329 icu::LocalMemory<uint8_t> data((uint8_t *)uprv_malloc(length)); 330 if(data.isNull()) { 331 fclose(file); 332 fprintf(stderr, "icupkg: malloc error allocating %d bytes.\n", (int)length); 333 exit(U_MEMORY_ALLOCATION_ERROR); 334 } 335 336 /* read the file */ 337 if(fileLength!=(int32_t)fread(data.getAlias(), 1, fileLength, file)) { 338 fprintf(stderr, "icupkg: error reading \"%s\"\n", filename); 339 fclose(file); 340 exit(U_FILE_ACCESS_ERROR); 341 } 342 343 /* pad the file to a multiple of 16 using the usual padding byte */ 344 if(fileLength<length) { 345 memset(data.getAlias()+fileLength, 0xaa, length-fileLength); 346 } 347 348 fclose(file); 349 350 // minimum check for ICU-format data 351 errorCode=U_ZERO_ERROR; 352 typeEnum=getTypeEnumForInputData(data.getAlias(), length, &errorCode); 353 if(typeEnum<0 || U_FAILURE(errorCode)) { 354 fprintf(stderr, "icupkg: not an ICU data file: \"%s\"\n", filename); 355#if !UCONFIG_NO_LEGACY_CONVERSION 356 exit(U_INVALID_FORMAT_ERROR); 357#else 358 fprintf(stderr, "U_INVALID_FORMAT_ERROR occurred but UCONFIG_NO_LEGACY_CONVERSION is on so this is expected.\n"); 359 exit(0); 360#endif 361 } 362 type=makeTypeLetter(typeEnum); 363 364 return data.orphan(); 365} 366 367// .dat package file representation ---------------------------------------- *** 368 369U_CDECL_BEGIN 370 371static int32_t U_CALLCONV 372compareItems(const void * /*context*/, const void *left, const void *right) { 373 U_NAMESPACE_USE 374 375 return (int32_t)strcmp(((Item *)left)->name, ((Item *)right)->name); 376} 377 378U_CDECL_END 379 380U_NAMESPACE_BEGIN 381 382Package::Package() 383 : doAutoPrefix(FALSE), prefixEndsWithType(FALSE) { 384 inPkgName[0]=0; 385 pkgPrefix[0]=0; 386 inData=NULL; 387 inLength=0; 388 inCharset=U_CHARSET_FAMILY; 389 inIsBigEndian=U_IS_BIG_ENDIAN; 390 391 itemCount=0; 392 itemMax=0; 393 items=NULL; 394 395 inStringTop=outStringTop=0; 396 397 matchMode=0; 398 findPrefix=findSuffix=NULL; 399 findPrefixLength=findSuffixLength=0; 400 findNextIndex=-1; 401 402 // create a header for an empty package 403 DataHeader *pHeader; 404 pHeader=(DataHeader *)header; 405 pHeader->dataHeader.magic1=0xda; 406 pHeader->dataHeader.magic2=0x27; 407 memcpy(&pHeader->info, &dataInfo, sizeof(dataInfo)); 408 headerLength=(int32_t)(4+sizeof(dataInfo)); 409 if(headerLength&0xf) { 410 /* NUL-pad the header to a multiple of 16 */ 411 int32_t length=(headerLength+0xf)&~0xf; 412 memset(header+headerLength, 0, length-headerLength); 413 headerLength=length; 414 } 415 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 416} 417 418Package::~Package() { 419 int32_t idx; 420 421 uprv_free(inData); 422 423 for(idx=0; idx<itemCount; ++idx) { 424 if(items[idx].isDataOwned) { 425 uprv_free(items[idx].data); 426 } 427 } 428 429 uprv_free((void*)items); 430} 431 432void 433Package::setPrefix(const char *p) { 434 if(strlen(p)>=sizeof(pkgPrefix)) { 435 fprintf(stderr, "icupkg: --toc_prefix %s too long\n", p); 436 exit(U_ILLEGAL_ARGUMENT_ERROR); 437 } 438 strcpy(pkgPrefix, p); 439} 440 441void 442Package::readPackage(const char *filename) { 443 UDataSwapper *ds; 444 const UDataInfo *pInfo; 445 UErrorCode errorCode; 446 447 const uint8_t *inBytes; 448 449 int32_t length, offset, i; 450 int32_t itemLength, typeEnum; 451 char type; 452 453 const UDataOffsetTOCEntry *inEntries; 454 455 extractPackageName(filename, inPkgName, (int32_t)sizeof(inPkgName)); 456 457 /* read the file */ 458 inData=readFile(NULL, filename, inLength, type); 459 length=inLength; 460 461 /* 462 * swap the header - even if the swapping itself is a no-op 463 * because it tells us the header length 464 */ 465 errorCode=U_ZERO_ERROR; 466 makeTypeProps(type, inCharset, inIsBigEndian); 467 ds=udata_openSwapper(inIsBigEndian, inCharset, U_IS_BIG_ENDIAN, U_CHARSET_FAMILY, &errorCode); 468 if(U_FAILURE(errorCode)) { 469 fprintf(stderr, "icupkg: udata_openSwapper(\"%s\") failed - %s\n", 470 filename, u_errorName(errorCode)); 471 exit(errorCode); 472 } 473 474 ds->printError=printPackageError; 475 ds->printErrorContext=stderr; 476 477 headerLength=sizeof(header); 478 if(length<headerLength) { 479 headerLength=length; 480 } 481 headerLength=udata_swapDataHeader(ds, inData, headerLength, header, &errorCode); 482 if(U_FAILURE(errorCode)) { 483 exit(errorCode); 484 } 485 486 /* check data format and format version */ 487 pInfo=(const UDataInfo *)((const char *)inData+4); 488 if(!( 489 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 490 pInfo->dataFormat[1]==0x6d && 491 pInfo->dataFormat[2]==0x6e && 492 pInfo->dataFormat[3]==0x44 && 493 pInfo->formatVersion[0]==1 494 )) { 495 fprintf(stderr, "icupkg: data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 496 pInfo->dataFormat[0], pInfo->dataFormat[1], 497 pInfo->dataFormat[2], pInfo->dataFormat[3], 498 pInfo->formatVersion[0]); 499 exit(U_UNSUPPORTED_ERROR); 500 } 501 inIsBigEndian=(UBool)pInfo->isBigEndian; 502 inCharset=pInfo->charsetFamily; 503 504 inBytes=(const uint8_t *)inData+headerLength; 505 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 506 507 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 508 length-=headerLength; 509 if(length<4) { 510 /* itemCount does not fit */ 511 offset=0x7fffffff; 512 } else { 513 itemCount=udata_readInt32(ds, *(const int32_t *)inBytes); 514 setItemCapacity(itemCount); /* resize so there's space */ 515 if(itemCount==0) { 516 offset=4; 517 } else if(length<(4+8*itemCount)) { 518 /* ToC table does not fit */ 519 offset=0x7fffffff; 520 } else { 521 /* offset of the last item plus at least 20 bytes for its header */ 522 offset=20+(int32_t)ds->readUInt32(inEntries[itemCount-1].dataOffset); 523 } 524 } 525 if(length<offset) { 526 fprintf(stderr, "icupkg: too few bytes (%ld after header) for a .dat package\n", 527 (long)length); 528 exit(U_INDEX_OUTOFBOUNDS_ERROR); 529 } 530 /* do not modify the package length variable until the last item's length is set */ 531 532 if(itemCount<=0) { 533 if(doAutoPrefix) { 534 fprintf(stderr, "icupkg: --auto_toc_prefix[_with_type] but the input package is empty\n"); 535 exit(U_INVALID_FORMAT_ERROR); 536 } 537 } else { 538 char prefix[MAX_PKG_NAME_LENGTH+4]; 539 char *s, *inItemStrings; 540 541 if(itemCount>itemMax) { 542 fprintf(stderr, "icupkg: too many items, maximum is %d\n", itemMax); 543 exit(U_BUFFER_OVERFLOW_ERROR); 544 } 545 546 /* swap the item name strings */ 547 int32_t stringsOffset=4+8*itemCount; 548 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset))-stringsOffset; 549 550 // don't include padding bytes at the end of the item names 551 while(itemLength>0 && inBytes[stringsOffset+itemLength-1]!=0) { 552 --itemLength; 553 } 554 555 if((inStringTop+itemLength)>STRING_STORE_SIZE) { 556 fprintf(stderr, "icupkg: total length of item name strings too long\n"); 557 exit(U_BUFFER_OVERFLOW_ERROR); 558 } 559 560 inItemStrings=inStrings+inStringTop; 561 ds->swapInvChars(ds, inBytes+stringsOffset, itemLength, inItemStrings, &errorCode); 562 if(U_FAILURE(errorCode)) { 563 fprintf(stderr, "icupkg failed to swap the input .dat package item name strings\n"); 564 exit(U_INVALID_FORMAT_ERROR); 565 } 566 inStringTop+=itemLength; 567 568 // reset the Item entries 569 memset(items, 0, itemCount*sizeof(Item)); 570 571 /* 572 * Get the common prefix of the items. 573 * New-style ICU .dat packages use tree separators ('/') between package names, 574 * tree names, and item names, 575 * while old-style ICU .dat packages (before multi-tree support) 576 * use an underscore ('_') between package and item names. 577 */ 578 offset=(int32_t)ds->readUInt32(inEntries[0].nameOffset)-stringsOffset; 579 s=inItemStrings+offset; // name of the first entry 580 int32_t prefixLength; 581 if(doAutoPrefix) { 582 // Use the first entry's prefix. Must be a new-style package. 583 const char *prefixLimit=strchr(s, U_TREE_ENTRY_SEP_CHAR); 584 if(prefixLimit==NULL) { 585 fprintf(stderr, 586 "icupkg: --auto_toc_prefix[_with_type] but " 587 "the first entry \"%s\" does not contain a '%c'\n", 588 s, U_TREE_ENTRY_SEP_CHAR); 589 exit(U_INVALID_FORMAT_ERROR); 590 } 591 prefixLength=(int32_t)(prefixLimit-s); 592 if(prefixLength==0 || prefixLength>=UPRV_LENGTHOF(pkgPrefix)) { 593 fprintf(stderr, 594 "icupkg: --auto_toc_prefix[_with_type] but " 595 "the prefix of the first entry \"%s\" is empty or too long\n", 596 s); 597 exit(U_INVALID_FORMAT_ERROR); 598 } 599 if(prefixEndsWithType && s[prefixLength-1]!=type) { 600 fprintf(stderr, 601 "icupkg: --auto_toc_prefix_with_type but " 602 "the prefix of the first entry \"%s\" does not end with '%c'\n", 603 s, type); 604 exit(U_INVALID_FORMAT_ERROR); 605 } 606 memcpy(pkgPrefix, s, prefixLength); 607 pkgPrefix[prefixLength]=0; 608 memcpy(prefix, s, ++prefixLength); // include the / 609 } else { 610 // Use the package basename as prefix. 611 int32_t inPkgNameLength=strlen(inPkgName); 612 memcpy(prefix, inPkgName, inPkgNameLength); 613 prefixLength=inPkgNameLength; 614 615 if( (int32_t)strlen(s)>=(inPkgNameLength+2) && 616 0==memcmp(s, inPkgName, inPkgNameLength) && 617 s[inPkgNameLength]=='_' 618 ) { 619 // old-style .dat package 620 prefix[prefixLength++]='_'; 621 } else { 622 // new-style .dat package 623 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 624 // if it turns out to not contain U_TREE_ENTRY_SEP_CHAR 625 // then the test in the loop below will fail 626 } 627 } 628 prefix[prefixLength]=0; 629 630 /* read the ToC table */ 631 for(i=0; i<itemCount; ++i) { 632 // skip the package part of the item name, error if it does not match the actual package name 633 // or if nothing follows the package name 634 offset=(int32_t)ds->readUInt32(inEntries[i].nameOffset)-stringsOffset; 635 s=inItemStrings+offset; 636 if(0!=strncmp(s, prefix, prefixLength) || s[prefixLength]==0) { 637 fprintf(stderr, "icupkg: input .dat item name \"%s\" does not start with \"%s\"\n", 638 s, prefix); 639 exit(U_INVALID_FORMAT_ERROR); 640 } 641 items[i].name=s+prefixLength; 642 643 // set the item's data 644 items[i].data=(uint8_t *)inBytes+ds->readUInt32(inEntries[i].dataOffset); 645 if(i>0) { 646 items[i-1].length=(int32_t)(items[i].data-items[i-1].data); 647 648 // set the previous item's platform type 649 typeEnum=getTypeEnumForInputData(items[i-1].data, items[i-1].length, &errorCode); 650 if(typeEnum<0 || U_FAILURE(errorCode)) { 651 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 652 exit(U_INVALID_FORMAT_ERROR); 653 } 654 items[i-1].type=makeTypeLetter(typeEnum); 655 } 656 items[i].isDataOwned=FALSE; 657 } 658 // set the last item's length 659 items[itemCount-1].length=length-ds->readUInt32(inEntries[itemCount-1].dataOffset); 660 661 // set the last item's platform type 662 typeEnum=getTypeEnumForInputData(items[itemCount-1].data, items[itemCount-1].length, &errorCode); 663 if(typeEnum<0 || U_FAILURE(errorCode)) { 664 fprintf(stderr, "icupkg: not an ICU data file: item \"%s\" in \"%s\"\n", items[i-1].name, filename); 665 exit(U_INVALID_FORMAT_ERROR); 666 } 667 items[itemCount-1].type=makeTypeLetter(typeEnum); 668 669 if(type!=U_ICUDATA_TYPE_LETTER[0]) { 670 // sort the item names for the local charset 671 sortItems(); 672 } 673 } 674 675 udata_closeSwapper(ds); 676} 677 678char 679Package::getInType() { 680 return makeTypeLetter(inCharset, inIsBigEndian); 681} 682 683void 684Package::writePackage(const char *filename, char outType, const char *comment) { 685 char prefix[MAX_PKG_NAME_LENGTH+4]; 686 UDataOffsetTOCEntry entry; 687 UDataSwapper *dsLocalToOut, *ds[TYPE_COUNT]; 688 FILE *file; 689 Item *pItem; 690 char *name; 691 UErrorCode errorCode; 692 int32_t i, length, prefixLength, maxItemLength, basenameOffset, offset, outInt32; 693 uint8_t outCharset; 694 UBool outIsBigEndian; 695 696 extractPackageName(filename, prefix, MAX_PKG_NAME_LENGTH); 697 698 // if there is an explicit comment, then use it, else use what's in the current header 699 if(comment!=NULL) { 700 /* get the header size minus the current comment */ 701 DataHeader *pHeader; 702 int32_t length; 703 704 pHeader=(DataHeader *)header; 705 headerLength=4+pHeader->info.size; 706 length=(int32_t)strlen(comment); 707 if((int32_t)(headerLength+length)>=(int32_t)sizeof(header)) { 708 fprintf(stderr, "icupkg: comment too long\n"); 709 exit(U_BUFFER_OVERFLOW_ERROR); 710 } 711 memcpy(header+headerLength, comment, length+1); 712 headerLength+=length; 713 if(headerLength&0xf) { 714 /* NUL-pad the header to a multiple of 16 */ 715 length=(headerLength+0xf)&~0xf; 716 memset(header+headerLength, 0, length-headerLength); 717 headerLength=length; 718 } 719 pHeader->dataHeader.headerSize=(uint16_t)headerLength; 720 } 721 722 makeTypeProps(outType, outCharset, outIsBigEndian); 723 724 // open (TYPE_COUNT-2) swappers 725 // one is a no-op for local type==outType 726 // one type (TYPE_LE) is bogus 727 errorCode=U_ZERO_ERROR; 728 i=makeTypeEnum(outType); 729 ds[TYPE_B]= i==TYPE_B ? NULL : udata_openSwapper(TRUE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 730 ds[TYPE_L]= i==TYPE_L ? NULL : udata_openSwapper(FALSE, U_ASCII_FAMILY, outIsBigEndian, outCharset, &errorCode); 731 ds[TYPE_LE]=NULL; 732 ds[TYPE_E]= i==TYPE_E ? NULL : udata_openSwapper(TRUE, U_EBCDIC_FAMILY, outIsBigEndian, outCharset, &errorCode); 733 if(U_FAILURE(errorCode)) { 734 fprintf(stderr, "icupkg: udata_openSwapper() failed - %s\n", u_errorName(errorCode)); 735 exit(errorCode); 736 } 737 for(i=0; i<TYPE_COUNT; ++i) { 738 if(ds[i]!=NULL) { 739 ds[i]->printError=printPackageError; 740 ds[i]->printErrorContext=stderr; 741 } 742 } 743 744 dsLocalToOut=ds[makeTypeEnum(U_CHARSET_FAMILY, U_IS_BIG_ENDIAN)]; 745 746 // create the file and write its contents 747 file=fopen(filename, "wb"); 748 if(file==NULL) { 749 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 750 exit(U_FILE_ACCESS_ERROR); 751 } 752 753 // swap and write the header 754 if(dsLocalToOut!=NULL) { 755 udata_swapDataHeader(dsLocalToOut, header, headerLength, header, &errorCode); 756 if(U_FAILURE(errorCode)) { 757 fprintf(stderr, "icupkg: udata_swapDataHeader(local to out) failed - %s\n", u_errorName(errorCode)); 758 exit(errorCode); 759 } 760 } 761 length=(int32_t)fwrite(header, 1, headerLength, file); 762 if(length!=headerLength) { 763 fprintf(stderr, "icupkg: unable to write complete header to file \"%s\"\n", filename); 764 exit(U_FILE_ACCESS_ERROR); 765 } 766 767 // prepare and swap the package name with a tree separator 768 // for prepending to item names 769 if(pkgPrefix[0]==0) { 770 prefixLength=(int32_t)strlen(prefix); 771 } else { 772 prefixLength=(int32_t)strlen(pkgPrefix); 773 memcpy(prefix, pkgPrefix, prefixLength); 774 if(prefixEndsWithType) { 775 prefix[prefixLength-1]=outType; 776 } 777 } 778 prefix[prefixLength++]=U_TREE_ENTRY_SEP_CHAR; 779 prefix[prefixLength]=0; 780 if(dsLocalToOut!=NULL) { 781 dsLocalToOut->swapInvChars(dsLocalToOut, prefix, prefixLength, prefix, &errorCode); 782 if(U_FAILURE(errorCode)) { 783 fprintf(stderr, "icupkg: swapInvChars(output package name) failed - %s\n", u_errorName(errorCode)); 784 exit(errorCode); 785 } 786 787 // swap and sort the item names (sorting needs to be done in the output charset) 788 dsLocalToOut->swapInvChars(dsLocalToOut, inStrings, inStringTop, inStrings, &errorCode); 789 if(U_FAILURE(errorCode)) { 790 fprintf(stderr, "icupkg: swapInvChars(item names) failed - %s\n", u_errorName(errorCode)); 791 exit(errorCode); 792 } 793 sortItems(); 794 } 795 796 // create the output item names in sorted order, with the package name prepended to each 797 for(i=0; i<itemCount; ++i) { 798 length=(int32_t)strlen(items[i].name); 799 name=allocString(FALSE, length+prefixLength); 800 memcpy(name, prefix, prefixLength); 801 memcpy(name+prefixLength, items[i].name, length+1); 802 items[i].name=name; 803 } 804 805 // calculate offsets for item names and items, pad to 16-align items 806 // align only the first item; each item's length is a multiple of 16 807 basenameOffset=4+8*itemCount; 808 offset=basenameOffset+outStringTop; 809 if((length=(offset&15))!=0) { 810 length=16-length; 811 memset(allocString(FALSE, length-1), 0xaa, length); 812 offset+=length; 813 } 814 815 // write the table of contents 816 // first the itemCount 817 outInt32=itemCount; 818 if(dsLocalToOut!=NULL) { 819 dsLocalToOut->swapArray32(dsLocalToOut, &outInt32, 4, &outInt32, &errorCode); 820 if(U_FAILURE(errorCode)) { 821 fprintf(stderr, "icupkg: swapArray32(item count) failed - %s\n", u_errorName(errorCode)); 822 exit(errorCode); 823 } 824 } 825 length=(int32_t)fwrite(&outInt32, 1, 4, file); 826 if(length!=4) { 827 fprintf(stderr, "icupkg: unable to write complete item count to file \"%s\"\n", filename); 828 exit(U_FILE_ACCESS_ERROR); 829 } 830 831 // then write the item entries (and collect the maxItemLength) 832 maxItemLength=0; 833 for(i=0; i<itemCount; ++i) { 834 entry.nameOffset=(uint32_t)(basenameOffset+(items[i].name-outStrings)); 835 entry.dataOffset=(uint32_t)offset; 836 if(dsLocalToOut!=NULL) { 837 dsLocalToOut->swapArray32(dsLocalToOut, &entry, 8, &entry, &errorCode); 838 if(U_FAILURE(errorCode)) { 839 fprintf(stderr, "icupkg: swapArray32(item entry %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 840 exit(errorCode); 841 } 842 } 843 length=(int32_t)fwrite(&entry, 1, 8, file); 844 if(length!=8) { 845 fprintf(stderr, "icupkg: unable to write complete item entry %ld to file \"%s\"\n", (long)i, filename); 846 exit(U_FILE_ACCESS_ERROR); 847 } 848 849 length=items[i].length; 850 if(length>maxItemLength) { 851 maxItemLength=length; 852 } 853 offset+=length; 854 } 855 856 // write the item names 857 length=(int32_t)fwrite(outStrings, 1, outStringTop, file); 858 if(length!=outStringTop) { 859 fprintf(stderr, "icupkg: unable to write complete item names to file \"%s\"\n", filename); 860 exit(U_FILE_ACCESS_ERROR); 861 } 862 863 // write the items 864 for(pItem=items, i=0; i<itemCount; ++pItem, ++i) { 865 int32_t type=makeTypeEnum(pItem->type); 866 if(ds[type]!=NULL) { 867 // swap each item from its platform properties to the desired ones 868 udata_swap( 869 ds[type], 870 pItem->data, pItem->length, pItem->data, 871 &errorCode); 872 if(U_FAILURE(errorCode)) { 873 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)i, u_errorName(errorCode)); 874 exit(errorCode); 875 } 876 } 877 length=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 878 if(length!=pItem->length) { 879 fprintf(stderr, "icupkg: unable to write complete item %ld to file \"%s\"\n", (long)i, filename); 880 exit(U_FILE_ACCESS_ERROR); 881 } 882 } 883 884 if(ferror(file)) { 885 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 886 exit(U_FILE_ACCESS_ERROR); 887 } 888 889 fclose(file); 890 for(i=0; i<TYPE_COUNT; ++i) { 891 udata_closeSwapper(ds[i]); 892 } 893} 894 895int32_t 896Package::findItem(const char *name, int32_t length) const { 897 int32_t i, start, limit; 898 int result; 899 900 /* do a binary search for the string */ 901 start=0; 902 limit=itemCount; 903 while(start<limit) { 904 i=(start+limit)/2; 905 if(length>=0) { 906 result=strncmp(name, items[i].name, length); 907 } else { 908 result=strcmp(name, items[i].name); 909 } 910 911 if(result==0) { 912 /* found */ 913 if(length>=0) { 914 /* 915 * if we compared just prefixes, then we may need to back up 916 * to the first item with this prefix 917 */ 918 while(i>0 && 0==strncmp(name, items[i-1].name, length)) { 919 --i; 920 } 921 } 922 return i; 923 } else if(result<0) { 924 limit=i; 925 } else /* result>0 */ { 926 start=i+1; 927 } 928 } 929 930 return ~start; /* not found, return binary-not of the insertion point */ 931} 932 933void 934Package::findItems(const char *pattern) { 935 const char *wild; 936 937 if(pattern==NULL || *pattern==0) { 938 findNextIndex=-1; 939 return; 940 } 941 942 findPrefix=pattern; 943 findSuffix=NULL; 944 findSuffixLength=0; 945 946 wild=strchr(pattern, '*'); 947 if(wild==NULL) { 948 // no wildcard 949 findPrefixLength=(int32_t)strlen(pattern); 950 } else { 951 // one wildcard 952 findPrefixLength=(int32_t)(wild-pattern); 953 findSuffix=wild+1; 954 findSuffixLength=(int32_t)strlen(findSuffix); 955 if(NULL!=strchr(findSuffix, '*')) { 956 // two or more wildcards 957 fprintf(stderr, "icupkg: syntax error (more than one '*') in item pattern \"%s\"\n", pattern); 958 exit(U_PARSE_ERROR); 959 } 960 } 961 962 if(findPrefixLength==0) { 963 findNextIndex=0; 964 } else { 965 findNextIndex=findItem(findPrefix, findPrefixLength); 966 } 967} 968 969int32_t 970Package::findNextItem() { 971 const char *name, *middle, *treeSep; 972 int32_t idx, nameLength, middleLength; 973 974 if(findNextIndex<0) { 975 return -1; 976 } 977 978 while(findNextIndex<itemCount) { 979 idx=findNextIndex++; 980 name=items[idx].name; 981 nameLength=(int32_t)strlen(name); 982 if(nameLength<(findPrefixLength+findSuffixLength)) { 983 // item name too short for prefix & suffix 984 continue; 985 } 986 if(findPrefixLength>0 && 0!=memcmp(findPrefix, name, findPrefixLength)) { 987 // left the range of names with this prefix 988 break; 989 } 990 middle=name+findPrefixLength; 991 middleLength=nameLength-findPrefixLength-findSuffixLength; 992 if(findSuffixLength>0 && 0!=memcmp(findSuffix, name+(nameLength-findSuffixLength), findSuffixLength)) { 993 // suffix does not match 994 continue; 995 } 996 // prefix & suffix match 997 998 if(matchMode&MATCH_NOSLASH) { 999 treeSep=strchr(middle, U_TREE_ENTRY_SEP_CHAR); 1000 if(treeSep!=NULL && (treeSep-middle)<middleLength) { 1001 // the middle (matching the * wildcard) contains a tree separator / 1002 continue; 1003 } 1004 } 1005 1006 // found a matching item 1007 return idx; 1008 } 1009 1010 // no more items 1011 findNextIndex=-1; 1012 return -1; 1013} 1014 1015void 1016Package::setMatchMode(uint32_t mode) { 1017 matchMode=mode; 1018} 1019 1020void 1021Package::addItem(const char *name) { 1022 addItem(name, NULL, 0, FALSE, U_ICUDATA_TYPE_LETTER[0]); 1023} 1024 1025void 1026Package::addItem(const char *name, uint8_t *data, int32_t length, UBool isDataOwned, char type) { 1027 int32_t idx; 1028 1029 idx=findItem(name); 1030 if(idx<0) { 1031 // new item, make space at the insertion point 1032 ensureItemCapacity(); 1033 // move the following items down 1034 idx=~idx; 1035 if(idx<itemCount) { 1036 memmove(items+idx+1, items+idx, (itemCount-idx)*sizeof(Item)); 1037 } 1038 ++itemCount; 1039 1040 // reset this Item entry 1041 memset(items+idx, 0, sizeof(Item)); 1042 1043 // copy the item's name 1044 items[idx].name=allocString(TRUE, strlen(name)); 1045 strcpy(items[idx].name, name); 1046 pathToTree(items[idx].name); 1047 } else { 1048 // same-name item found, replace it 1049 if(items[idx].isDataOwned) { 1050 uprv_free(items[idx].data); 1051 } 1052 1053 // keep the item's name since it is the same 1054 } 1055 1056 // set the item's data 1057 items[idx].data=data; 1058 items[idx].length=length; 1059 items[idx].isDataOwned=isDataOwned; 1060 items[idx].type=type; 1061} 1062 1063void 1064Package::addFile(const char *filesPath, const char *name) { 1065 uint8_t *data; 1066 int32_t length; 1067 char type; 1068 1069 data=readFile(filesPath, name, length, type); 1070 // readFile() exits the tool if it fails 1071 addItem(name, data, length, TRUE, type); 1072} 1073 1074void 1075Package::addItems(const Package &listPkg) { 1076 const Item *pItem; 1077 int32_t i; 1078 1079 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1080 addItem(pItem->name, pItem->data, pItem->length, FALSE, pItem->type); 1081 } 1082} 1083 1084void 1085Package::removeItem(int32_t idx) { 1086 if(idx>=0) { 1087 // remove the item 1088 if(items[idx].isDataOwned) { 1089 uprv_free(items[idx].data); 1090 } 1091 1092 // move the following items up 1093 if((idx+1)<itemCount) { 1094 memmove(items+idx, items+idx+1, (itemCount-(idx+1))*sizeof(Item)); 1095 } 1096 --itemCount; 1097 1098 if(idx<=findNextIndex) { 1099 --findNextIndex; 1100 } 1101 } 1102} 1103 1104void 1105Package::removeItems(const char *pattern) { 1106 int32_t idx; 1107 1108 findItems(pattern); 1109 while((idx=findNextItem())>=0) { 1110 removeItem(idx); 1111 } 1112} 1113 1114void 1115Package::removeItems(const Package &listPkg) { 1116 const Item *pItem; 1117 int32_t i; 1118 1119 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1120 removeItems(pItem->name); 1121 } 1122} 1123 1124void 1125Package::extractItem(const char *filesPath, const char *outName, int32_t idx, char outType) { 1126 char filename[1024]; 1127 UDataSwapper *ds; 1128 FILE *file; 1129 Item *pItem; 1130 int32_t fileLength; 1131 uint8_t itemCharset, outCharset; 1132 UBool itemIsBigEndian, outIsBigEndian; 1133 1134 if(idx<0 || itemCount<=idx) { 1135 return; 1136 } 1137 pItem=items+idx; 1138 1139 // swap the data to the outType 1140 // outType==0: don't swap 1141 if(outType!=0 && pItem->type!=outType) { 1142 // open the swapper 1143 UErrorCode errorCode=U_ZERO_ERROR; 1144 makeTypeProps(pItem->type, itemCharset, itemIsBigEndian); 1145 makeTypeProps(outType, outCharset, outIsBigEndian); 1146 ds=udata_openSwapper(itemIsBigEndian, itemCharset, outIsBigEndian, outCharset, &errorCode); 1147 if(U_FAILURE(errorCode)) { 1148 fprintf(stderr, "icupkg: udata_openSwapper(item %ld) failed - %s\n", 1149 (long)idx, u_errorName(errorCode)); 1150 exit(errorCode); 1151 } 1152 1153 ds->printError=printPackageError; 1154 ds->printErrorContext=stderr; 1155 1156 // swap the item from its platform properties to the desired ones 1157 udata_swap(ds, pItem->data, pItem->length, pItem->data, &errorCode); 1158 if(U_FAILURE(errorCode)) { 1159 fprintf(stderr, "icupkg: udata_swap(item %ld) failed - %s\n", (long)idx, u_errorName(errorCode)); 1160 exit(errorCode); 1161 } 1162 udata_closeSwapper(ds); 1163 pItem->type=outType; 1164 } 1165 1166 // create the file and write its contents 1167 makeFullFilenameAndDirs(filesPath, outName, filename, (int32_t)sizeof(filename)); 1168 file=fopen(filename, "wb"); 1169 if(file==NULL) { 1170 fprintf(stderr, "icupkg: unable to create file \"%s\"\n", filename); 1171 exit(U_FILE_ACCESS_ERROR); 1172 } 1173 fileLength=(int32_t)fwrite(pItem->data, 1, pItem->length, file); 1174 1175 if(ferror(file) || fileLength!=pItem->length) { 1176 fprintf(stderr, "icupkg: unable to write complete file \"%s\"\n", filename); 1177 exit(U_FILE_ACCESS_ERROR); 1178 } 1179 fclose(file); 1180} 1181 1182void 1183Package::extractItem(const char *filesPath, int32_t idx, char outType) { 1184 extractItem(filesPath, items[idx].name, idx, outType); 1185} 1186 1187void 1188Package::extractItems(const char *filesPath, const char *pattern, char outType) { 1189 int32_t idx; 1190 1191 findItems(pattern); 1192 while((idx=findNextItem())>=0) { 1193 extractItem(filesPath, idx, outType); 1194 } 1195} 1196 1197void 1198Package::extractItems(const char *filesPath, const Package &listPkg, char outType) { 1199 const Item *pItem; 1200 int32_t i; 1201 1202 for(pItem=listPkg.items, i=0; i<listPkg.itemCount; ++pItem, ++i) { 1203 extractItems(filesPath, pItem->name, outType); 1204 } 1205} 1206 1207int32_t 1208Package::getItemCount() const { 1209 return itemCount; 1210} 1211 1212const Item * 1213Package::getItem(int32_t idx) const { 1214 if (0 <= idx && idx < itemCount) { 1215 return &items[idx]; 1216 } 1217 return NULL; 1218} 1219 1220void 1221Package::checkDependency(void *context, const char *itemName, const char *targetName) { 1222 // check dependency: make sure the target item is in the package 1223 Package *me=(Package *)context; 1224 if(me->findItem(targetName)<0) { 1225 me->isMissingItems=TRUE; 1226 fprintf(stderr, "Item %s depends on missing item %s\n", itemName, targetName); 1227 } 1228} 1229 1230UBool 1231Package::checkDependencies() { 1232 isMissingItems=FALSE; 1233 enumDependencies(this, checkDependency); 1234 return (UBool)!isMissingItems; 1235} 1236 1237void 1238Package::enumDependencies(void *context, CheckDependency check) { 1239 int32_t i; 1240 1241 for(i=0; i<itemCount; ++i) { 1242 enumDependencies(items+i, context, check); 1243 } 1244} 1245 1246char * 1247Package::allocString(UBool in, int32_t length) { 1248 char *p; 1249 int32_t top; 1250 1251 if(in) { 1252 top=inStringTop; 1253 p=inStrings+top; 1254 } else { 1255 top=outStringTop; 1256 p=outStrings+top; 1257 } 1258 top+=length+1; 1259 1260 if(top>STRING_STORE_SIZE) { 1261 fprintf(stderr, "icupkg: string storage overflow\n"); 1262 exit(U_BUFFER_OVERFLOW_ERROR); 1263 } 1264 if(in) { 1265 inStringTop=top; 1266 } else { 1267 outStringTop=top; 1268 } 1269 return p; 1270} 1271 1272void 1273Package::sortItems() { 1274 UErrorCode errorCode=U_ZERO_ERROR; 1275 uprv_sortArray(items, itemCount, (int32_t)sizeof(Item), compareItems, NULL, FALSE, &errorCode); 1276 if(U_FAILURE(errorCode)) { 1277 fprintf(stderr, "icupkg: sorting item names failed - %s\n", u_errorName(errorCode)); 1278 exit(errorCode); 1279 } 1280} 1281 1282void Package::setItemCapacity(int32_t max) 1283{ 1284 if(max<=itemMax) { 1285 return; 1286 } 1287 Item *newItems = (Item*)uprv_malloc(max * sizeof(items[0])); 1288 Item *oldItems = items; 1289 if(newItems == NULL) { 1290 fprintf(stderr, "icupkg: Out of memory trying to allocate %lu bytes for %d items\n", 1291 (unsigned long)max*sizeof(items[0]), max); 1292 exit(U_MEMORY_ALLOCATION_ERROR); 1293 } 1294 if(items && itemCount>0) { 1295 uprv_memcpy(newItems, items, itemCount*sizeof(items[0])); 1296 } 1297 itemMax = max; 1298 items = newItems; 1299 uprv_free(oldItems); 1300} 1301 1302void Package::ensureItemCapacity() 1303{ 1304 if((itemCount+1)>itemMax) { 1305 setItemCapacity(itemCount+kItemsChunk); 1306 } 1307} 1308 1309U_NAMESPACE_END 1310