1/* 2******************************************************************************* 3* 4* Copyright (C) 2003-2014, International Business Machines 5* Corporation and others. All Rights Reserved. 6* 7******************************************************************************* 8* file name: icuswap.cpp 9* encoding: US-ASCII 10* tab size: 8 (not used) 11* indentation:4 12* 13* created on: 2003aug08 14* created by: Markus W. Scherer 15* 16* This tool takes an ICU data file and "swaps" it, that is, changes its 17* platform properties between big-/little-endianness and ASCII/EBCDIC charset 18* families. 19* The modified data file is written to a new file. 20* Useful as an install-time tool for shipping only one flavor of ICU data 21* and preparing data files for the target platform. 22* Will not work with data DLLs (shared libraries). 23*/ 24 25#include "unicode/utypes.h" 26#include "unicode/putil.h" 27#include "unicode/udata.h" 28#include "cmemory.h" 29#include "cstring.h" 30#include "uinvchar.h" 31#include "uarrsort.h" 32#include "ucmndata.h" 33#include "udataswp.h" 34#include "swapimpl.h" 35#include "toolutil.h" 36#include "uoptions.h" 37 38#include <stdio.h> 39#include <stdlib.h> 40#include <string.h> 41 42/* definitions */ 43 44#define DEFAULT_PADDING_LENGTH 15 45 46static UOption options[]={ 47 UOPTION_HELP_H, 48 UOPTION_HELP_QUESTION_MARK, 49 UOPTION_DEF("type", 't', UOPT_REQUIRES_ARG) 50}; 51 52enum { 53 OPT_HELP_H, 54 OPT_HELP_QUESTION_MARK, 55 OPT_OUT_TYPE 56}; 57 58static int32_t 59fileSize(FILE *f) { 60 int32_t size; 61 62 fseek(f, 0, SEEK_END); 63 size=(int32_t)ftell(f); 64 fseek(f, 0, SEEK_SET); 65 return size; 66} 67 68/** 69 * Swap an ICU .dat package, including swapping of enclosed items. 70 */ 71U_CFUNC int32_t U_CALLCONV 72udata_swapPackage(const char *inFilename, const char *outFilename, 73 const UDataSwapper *ds, 74 const void *inData, int32_t length, void *outData, 75 UErrorCode *pErrorCode); 76 77U_CDECL_BEGIN 78static void U_CALLCONV 79printError(void *context, const char *fmt, va_list args) { 80 vfprintf((FILE *)context, fmt, args); 81} 82U_CDECL_END 83 84static int 85printUsage(const char *pname, UBool ishelp) { 86 fprintf(stderr, 87 "%csage: %s [ -h, -?, --help ] -tl|-tb|-te|--type=b|... infilename outfilename\n", 88 ishelp ? 'U' : 'u', pname); 89 if(ishelp) { 90 fprintf(stderr, 91 "\nOptions: -h, -?, --help print this message and exit\n" 92 " Read the input file, swap its platform properties according\n" 93 " to the -t or --type option, and write the result to the output file.\n" 94 " -tl change to little-endian/ASCII charset family\n" 95 " -tb change to big-endian/ASCII charset family\n" 96 " -te change to big-endian/EBCDIC charset family\n"); 97 } 98 99 return !ishelp; 100} 101 102extern int 103main(int argc, char *argv[]) { 104 FILE *in, *out; 105 const char *pname; 106 char *data; 107 int32_t length; 108 UBool ishelp; 109 int rc; 110 111 UDataSwapper *ds; 112 const UDataInfo *pInfo; 113 UErrorCode errorCode; 114 uint8_t outCharset; 115 UBool outIsBigEndian; 116 117 U_MAIN_INIT_ARGS(argc, argv); 118 119 fprintf(stderr, "Warning: icuswap is an obsolete tool and it will be removed in the next ICU release.\nPlease use the icupkg tool instead.\n"); 120 121 /* get the program basename */ 122 pname=strrchr(argv[0], U_FILE_SEP_CHAR); 123 if(pname==NULL) { 124 pname=strrchr(argv[0], '/'); 125 } 126 if(pname!=NULL) { 127 ++pname; 128 } else { 129 pname=argv[0]; 130 } 131 132 argc=u_parseArgs(argc, argv, UPRV_LENGTHOF(options), options); 133 ishelp=options[OPT_HELP_H].doesOccur || options[OPT_HELP_QUESTION_MARK].doesOccur; 134 if(ishelp || argc!=3) { 135 return printUsage(pname, ishelp); 136 } 137 138 /* parse the output type option */ 139 data=(char *)options[OPT_OUT_TYPE].value; 140 if(data[0]==0 || data[1]!=0) { 141 /* the type must be exactly one letter */ 142 return printUsage(pname, FALSE); 143 } 144 switch(data[0]) { 145 case 'l': 146 outIsBigEndian=FALSE; 147 outCharset=U_ASCII_FAMILY; 148 break; 149 case 'b': 150 outIsBigEndian=TRUE; 151 outCharset=U_ASCII_FAMILY; 152 break; 153 case 'e': 154 outIsBigEndian=TRUE; 155 outCharset=U_EBCDIC_FAMILY; 156 break; 157 default: 158 return printUsage(pname, FALSE); 159 } 160 161 in=out=NULL; 162 data=NULL; 163 164 /* open the input file, get its length, allocate memory for it, read the file */ 165 in=fopen(argv[1], "rb"); 166 if(in==NULL) { 167 fprintf(stderr, "%s: unable to open input file \"%s\"\n", pname, argv[1]); 168 rc=2; 169 goto done; 170 } 171 172 length=fileSize(in); 173 if(length<DEFAULT_PADDING_LENGTH) { 174 fprintf(stderr, "%s: empty input file \"%s\"\n", pname, argv[1]); 175 rc=2; 176 goto done; 177 } 178 179 /* 180 * +15: udata_swapPackage() may need to add a few padding bytes to the 181 * last item if charset swapping is done, 182 * because the last item may be resorted into the middle and then needs 183 * additional padding bytes 184 */ 185 data=(char *)malloc(length+DEFAULT_PADDING_LENGTH); 186 if(data==NULL) { 187 fprintf(stderr, "%s: error allocating memory for \"%s\"\n", pname, argv[1]); 188 rc=2; 189 goto done; 190 } 191 192 /* set the last 15 bytes to the usual padding byte, see udata_swapPackage() */ 193 uprv_memset(data+length-DEFAULT_PADDING_LENGTH, 0xaa, DEFAULT_PADDING_LENGTH); 194 195 if(length!=(int32_t)fread(data, 1, length, in)) { 196 fprintf(stderr, "%s: error reading \"%s\"\n", pname, argv[1]); 197 rc=3; 198 goto done; 199 } 200 201 fclose(in); 202 in=NULL; 203 204 /* swap the data in-place */ 205 errorCode=U_ZERO_ERROR; 206 ds=udata_openSwapperForInputData(data, length, outIsBigEndian, outCharset, &errorCode); 207 if(U_FAILURE(errorCode)) { 208 fprintf(stderr, "%s: udata_openSwapperForInputData(\"%s\") failed - %s\n", 209 pname, argv[1], u_errorName(errorCode)); 210 rc=4; 211 goto done; 212 } 213 214 ds->printError=printError; 215 ds->printErrorContext=stderr; 216 217 /* speculative cast, protected by the following length check */ 218 pInfo=(const UDataInfo *)((const char *)data+4); 219 220 if( length>=20 && 221 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 222 pInfo->dataFormat[1]==0x6d && 223 pInfo->dataFormat[2]==0x6e && 224 pInfo->dataFormat[3]==0x44 225 ) { 226 /* 227 * swap the .dat package 228 * udata_swapPackage() needs to rename ToC name entries from the old package 229 * name to the new one. 230 * We pass it the filenames, and udata_swapPackage() will extract the 231 * package names. 232 */ 233 length=udata_swapPackage(argv[1], argv[2], ds, data, length, data, &errorCode); 234 udata_closeSwapper(ds); 235 if(U_FAILURE(errorCode)) { 236 fprintf(stderr, "%s: udata_swapPackage(\"%s\") failed - %s\n", 237 pname, argv[1], u_errorName(errorCode)); 238 rc=4; 239 goto done; 240 } 241 } else { 242 /* swap the data, which is not a .dat package */ 243 length=udata_swap(ds, data, length, data, &errorCode); 244 udata_closeSwapper(ds); 245 if(U_FAILURE(errorCode)) { 246 fprintf(stderr, "%s: udata_swap(\"%s\") failed - %s\n", 247 pname, argv[1], u_errorName(errorCode)); 248 rc=4; 249 goto done; 250 } 251 } 252 253 out=fopen(argv[2], "wb"); 254 if(out==NULL) { 255 fprintf(stderr, "%s: unable to open output file \"%s\"\n", pname, argv[2]); 256 rc=5; 257 goto done; 258 } 259 260 if(length!=(int32_t)fwrite(data, 1, length, out)) { 261 fprintf(stderr, "%s: error writing \"%s\"\n", pname, argv[2]); 262 rc=6; 263 goto done; 264 } 265 266 fclose(out); 267 out=NULL; 268 269 /* all done */ 270 rc=0; 271 272done: 273 if(in!=NULL) { 274 fclose(in); 275 } 276 if(out!=NULL) { 277 fclose(out); 278 } 279 if(data!=NULL) { 280 free(data); 281 } 282 return rc; 283} 284 285/* swap .dat package files -------------------------------------------------- */ 286 287static int32_t 288extractPackageName(const UDataSwapper *ds, const char *filename, 289 char pkg[], int32_t capacity, 290 UErrorCode *pErrorCode) { 291 const char *basename; 292 int32_t len; 293 294 if(U_FAILURE(*pErrorCode)) { 295 return 0; 296 } 297 298 basename=findBasename(filename); 299 len=(int32_t)uprv_strlen(basename)-4; /* -4: subtract the length of ".dat" */ 300 301 if(len<=0 || 0!=uprv_strcmp(basename+len, ".dat")) { 302 udata_printError(ds, "udata_swapPackage(): \"%s\" is not recognized as a package filename (must end with .dat)\n", 303 basename); 304 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 305 return 0; 306 } 307 308 if(len>=capacity) { 309 udata_printError(ds, "udata_swapPackage(): the package name \"%s\" is too long (>=%ld)\n", 310 (long)capacity); 311 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 312 return 0; 313 } 314 315 uprv_memcpy(pkg, basename, len); 316 pkg[len]=0; 317 return len; 318} 319 320struct ToCEntry { 321 uint32_t nameOffset, inOffset, outOffset, length; 322}; 323 324U_CDECL_BEGIN 325static int32_t U_CALLCONV 326compareToCEntries(const void *context, const void *left, const void *right) { 327 const char *chars=(const char *)context; 328 return (int32_t)uprv_strcmp(chars+((const ToCEntry *)left)->nameOffset, 329 chars+((const ToCEntry *)right)->nameOffset); 330} 331U_CDECL_END 332 333U_CFUNC int32_t U_CALLCONV 334udata_swapPackage(const char *inFilename, const char *outFilename, 335 const UDataSwapper *ds, 336 const void *inData, int32_t length, void *outData, 337 UErrorCode *pErrorCode) { 338 const UDataInfo *pInfo; 339 int32_t headerSize; 340 341 const uint8_t *inBytes; 342 uint8_t *outBytes; 343 344 uint32_t itemCount, offset, i; 345 int32_t itemLength; 346 347 const UDataOffsetTOCEntry *inEntries; 348 UDataOffsetTOCEntry *outEntries; 349 350 ToCEntry *table; 351 352 char inPkgName[32], outPkgName[32]; 353 int32_t inPkgNameLength, outPkgNameLength; 354 355 /* udata_swapDataHeader checks the arguments */ 356 headerSize=udata_swapDataHeader(ds, inData, length, outData, pErrorCode); 357 if(pErrorCode==NULL || U_FAILURE(*pErrorCode)) { 358 return 0; 359 } 360 361 /* check data format and format version */ 362 pInfo=(const UDataInfo *)((const char *)inData+4); 363 if(!( 364 pInfo->dataFormat[0]==0x43 && /* dataFormat="CmnD" */ 365 pInfo->dataFormat[1]==0x6d && 366 pInfo->dataFormat[2]==0x6e && 367 pInfo->dataFormat[3]==0x44 && 368 pInfo->formatVersion[0]==1 369 )) { 370 udata_printError(ds, "udata_swapPackage(): data format %02x.%02x.%02x.%02x (format version %02x) is not recognized as an ICU .dat package\n", 371 pInfo->dataFormat[0], pInfo->dataFormat[1], 372 pInfo->dataFormat[2], pInfo->dataFormat[3], 373 pInfo->formatVersion[0]); 374 *pErrorCode=U_UNSUPPORTED_ERROR; 375 return 0; 376 } 377 378 /* 379 * We need to change the ToC name entries so that they have the correct 380 * package name prefix. 381 * Extract the package names from the in/out filenames. 382 */ 383 inPkgNameLength=extractPackageName( 384 ds, inFilename, 385 inPkgName, (int32_t)sizeof(inPkgName), 386 pErrorCode); 387 outPkgNameLength=extractPackageName( 388 ds, outFilename, 389 outPkgName, (int32_t)sizeof(outPkgName), 390 pErrorCode); 391 if(U_FAILURE(*pErrorCode)) { 392 return 0; 393 } 394 395 /* 396 * It is possible to work with inPkgNameLength!=outPkgNameLength, 397 * but then the length of the data file would change more significantly, 398 * which we are not currently prepared for. 399 */ 400 if(inPkgNameLength!=outPkgNameLength) { 401 udata_printError(ds, "udata_swapPackage(): the package names \"%s\" and \"%s\" must have the same length\n", 402 inPkgName, outPkgName); 403 *pErrorCode=U_ILLEGAL_ARGUMENT_ERROR; 404 return 0; 405 } 406 407 inBytes=(const uint8_t *)inData+headerSize; 408 inEntries=(const UDataOffsetTOCEntry *)(inBytes+4); 409 410 if(length<0) { 411 /* preflighting */ 412 itemCount=ds->readUInt32(*(const uint32_t *)inBytes); 413 if(itemCount==0) { 414 /* no items: count only the item count and return */ 415 return headerSize+4; 416 } 417 418 /* read the last item's offset and preflight it */ 419 offset=ds->readUInt32(inEntries[itemCount-1].dataOffset); 420 itemLength=udata_swap(ds, inBytes+offset, -1, NULL, pErrorCode); 421 422 if(U_SUCCESS(*pErrorCode)) { 423 return headerSize+offset+(uint32_t)itemLength; 424 } else { 425 return 0; 426 } 427 } else { 428 /* check that the itemCount fits, then the ToC table, then at least the header of the last item */ 429 length-=headerSize; 430 if(length<4) { 431 /* itemCount does not fit */ 432 offset=0xffffffff; 433 itemCount=0; /* make compilers happy */ 434 } else { 435 itemCount=ds->readUInt32(*(const uint32_t *)inBytes); 436 if(itemCount==0) { 437 offset=4; 438 } else if((uint32_t)length<(4+8*itemCount)) { 439 /* ToC table does not fit */ 440 offset=0xffffffff; 441 } else { 442 /* offset of the last item plus at least 20 bytes for its header */ 443 offset=20+ds->readUInt32(inEntries[itemCount-1].dataOffset); 444 } 445 } 446 if((uint32_t)length<offset) { 447 udata_printError(ds, "udata_swapPackage(): too few bytes (%d after header) for a .dat package\n", 448 length); 449 *pErrorCode=U_INDEX_OUTOFBOUNDS_ERROR; 450 return 0; 451 } 452 453 outBytes=(uint8_t *)outData+headerSize; 454 455 /* swap the item count */ 456 ds->swapArray32(ds, inBytes, 4, outBytes, pErrorCode); 457 458 if(itemCount==0) { 459 /* no items: just return now */ 460 return headerSize+4; 461 } 462 463 /* swap the item name strings */ 464 offset=4+8*itemCount; 465 itemLength=(int32_t)(ds->readUInt32(inEntries[0].dataOffset)-offset); 466 udata_swapInvStringBlock(ds, inBytes+offset, itemLength, outBytes+offset, pErrorCode); 467 if(U_FAILURE(*pErrorCode)) { 468 udata_printError(ds, "udata_swapPackage() failed to swap the data item name strings\n"); 469 return 0; 470 } 471 /* keep offset and itemLength in case we allocate and copy the strings below */ 472 473 /* swap the package names into the output charset */ 474 if(ds->outCharset!=U_CHARSET_FAMILY) { 475 UDataSwapper *ds2; 476 ds2=udata_openSwapper(TRUE, U_CHARSET_FAMILY, TRUE, ds->outCharset, pErrorCode); 477 ds2->swapInvChars(ds2, inPkgName, inPkgNameLength, inPkgName, pErrorCode); 478 ds2->swapInvChars(ds2, outPkgName, outPkgNameLength, outPkgName, pErrorCode); 479 udata_closeSwapper(ds2); 480 if(U_FAILURE(*pErrorCode)) { 481 udata_printError(ds, "udata_swapPackage() failed to swap the input/output package names\n"); 482 } 483 } 484 485 /* change the prefix of each ToC entry name from the old to the new package name */ 486 { 487 char *entryName; 488 489 for(i=0; i<itemCount; ++i) { 490 entryName=(char *)inBytes+ds->readUInt32(inEntries[i].nameOffset); 491 492 if(0==uprv_memcmp(entryName, inPkgName, inPkgNameLength)) { 493 uprv_memcpy(entryName, outPkgName, inPkgNameLength); 494 } else { 495 udata_printError(ds, "udata_swapPackage() failed: ToC item %ld does not have the input package name as a prefix\n", 496 (long)i); 497 *pErrorCode=U_INVALID_FORMAT_ERROR; 498 return 0; 499 } 500 } 501 } 502 503 /* 504 * Allocate the ToC table and, if necessary, a temporary buffer for 505 * pseudo-in-place swapping. 506 * 507 * We cannot swap in-place because: 508 * 509 * 1. If the swapping of an item fails mid-way, then in-place swapping 510 * has destroyed its data. 511 * Out-of-place swapping allows us to then copy its original data. 512 * 513 * 2. If swapping changes the charset family, then we must resort 514 * not only the ToC table but also the data items themselves. 515 * This requires a permutation and is best done with separate in/out 516 * buffers. 517 * 518 * We swapped the strings above to avoid the malloc below if string swapping fails. 519 */ 520 if(inData==outData) { 521 /* +15: prepare for extra padding of a newly-last item */ 522 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH); 523 if(table!=NULL) { 524 outBytes=(uint8_t *)(table+itemCount); 525 526 /* copy the item count and the swapped strings */ 527 uprv_memcpy(outBytes, inBytes, 4); 528 uprv_memcpy(outBytes+offset, inBytes+offset, itemLength); 529 } 530 } else { 531 table=(ToCEntry *)uprv_malloc(itemCount*sizeof(ToCEntry)); 532 } 533 if(table==NULL) { 534 udata_printError(ds, "udata_swapPackage(): out of memory allocating %d bytes\n", 535 inData==outData ? 536 itemCount*sizeof(ToCEntry)+length+DEFAULT_PADDING_LENGTH : 537 itemCount*sizeof(ToCEntry)); 538 *pErrorCode=U_MEMORY_ALLOCATION_ERROR; 539 return 0; 540 } 541 outEntries=(UDataOffsetTOCEntry *)(outBytes+4); 542 543 /* read the ToC table */ 544 for(i=0; i<itemCount; ++i) { 545 table[i].nameOffset=ds->readUInt32(inEntries[i].nameOffset); 546 table[i].inOffset=ds->readUInt32(inEntries[i].dataOffset); 547 if(i>0) { 548 table[i-1].length=table[i].inOffset-table[i-1].inOffset; 549 } 550 } 551 table[itemCount-1].length=(uint32_t)length-table[itemCount-1].inOffset; 552 553 if(ds->inCharset==ds->outCharset) { 554 /* no charset swapping, no resorting: keep item offsets the same */ 555 for(i=0; i<itemCount; ++i) { 556 table[i].outOffset=table[i].inOffset; 557 } 558 } else { 559 /* charset swapping: resort items by their swapped names */ 560 561 /* 562 * Before the actual sorting, we need to make sure that each item 563 * has a length that is a multiple of 16 bytes so that all items 564 * are 16-aligned. 565 * Only the old last item may be missing up to 15 padding bytes. 566 * Add padding bytes for it. 567 * Since the icuswap main() function has already allocated enough 568 * input buffer space and set the last 15 bytes there to 0xaa, 569 * we only need to increase the total data length and the length 570 * of the last item here. 571 */ 572 if((length&0xf)!=0) { 573 int32_t delta=16-(length&0xf); 574 length+=delta; 575 table[itemCount-1].length+=(uint32_t)delta; 576 } 577 578 /* Save the offset before we sort the TOC. */ 579 offset=table[0].inOffset; 580 /* sort the TOC entries */ 581 uprv_sortArray(table, (int32_t)itemCount, (int32_t)sizeof(ToCEntry), 582 compareToCEntries, outBytes, FALSE, pErrorCode); 583 584 /* 585 * Note: Before sorting, the inOffset values were in order. 586 * Now the outOffset values are in order. 587 */ 588 589 /* assign outOffset values */ 590 for(i=0; i<itemCount; ++i) { 591 table[i].outOffset=offset; 592 offset+=table[i].length; 593 } 594 } 595 596 /* write the output ToC table */ 597 for(i=0; i<itemCount; ++i) { 598 ds->writeUInt32(&outEntries[i].nameOffset, table[i].nameOffset); 599 ds->writeUInt32(&outEntries[i].dataOffset, table[i].outOffset); 600 } 601 602 /* swap each data item */ 603 for(i=0; i<itemCount; ++i) { 604 /* first copy the item bytes to make sure that unreachable bytes are copied */ 605 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); 606 607 /* swap the item */ 608 udata_swap(ds, inBytes+table[i].inOffset, (int32_t)table[i].length, 609 outBytes+table[i].outOffset, pErrorCode); 610 611 if(U_FAILURE(*pErrorCode)) { 612 if(ds->outCharset==U_CHARSET_FAMILY) { 613 udata_printError(ds, "warning: udata_swapPackage() failed to swap item \"%s\"\n" 614 " at inOffset 0x%x length 0x%x - %s\n" 615 " the data item will be copied, not swapped\n\n", 616 (char *)outBytes+table[i].nameOffset, 617 table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); 618 } else { 619 udata_printError(ds, "warning: udata_swapPackage() failed to swap an item\n" 620 " at inOffset 0x%x length 0x%x - %s\n" 621 " the data item will be copied, not swapped\n\n", 622 table[i].inOffset, table[i].length, u_errorName(*pErrorCode)); 623 } 624 /* reset the error code, copy the data item, and continue */ 625 *pErrorCode=U_ZERO_ERROR; 626 uprv_memcpy(outBytes+table[i].outOffset, inBytes+table[i].inOffset, table[i].length); 627 } 628 } 629 630 if(inData==outData) { 631 /* copy the data from the temporary buffer to the in-place buffer */ 632 uprv_memcpy((uint8_t *)outData+headerSize, outBytes, length); 633 } 634 uprv_free(table); 635 636 return headerSize+length; 637 } 638} 639 640/* 641 * Hey, Emacs, please set the following: 642 * 643 * Local Variables: 644 * indent-tabs-mode: nil 645 * End: 646 * 647 */ 648