1/****************************************************************************** 2 * Copyright (C) 2009-2013, International Business Machines 3 * Corporation and others. All Rights Reserved. 4 ******************************************************************************* 5 */ 6#include "unicode/utypes.h" 7 8#if U_PLATFORM_HAS_WIN32_API 9# define VC_EXTRALEAN 10# define WIN32_LEAN_AND_MEAN 11# define NOUSER 12# define NOSERVICE 13# define NOIME 14# define NOMCX 15#include <windows.h> 16#include <time.h> 17# ifdef __GNUC__ 18# define WINDOWS_WITH_GNUC 19# endif 20#endif 21 22#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H 23# define U_ELF 24#endif 25 26#ifdef U_ELF 27# include <elf.h> 28# if defined(ELFCLASS64) 29# define U_ELF64 30# endif 31 /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */ 32# ifndef EM_X86_64 33# define EM_X86_64 62 34# endif 35# define ICU_ENTRY_OFFSET 0 36#endif 37 38#include <stdio.h> 39#include <stdlib.h> 40#include "unicode/putil.h" 41#include "cmemory.h" 42#include "cstring.h" 43#include "filestrm.h" 44#include "toolutil.h" 45#include "unicode/uclean.h" 46#include "uoptions.h" 47#include "pkg_genc.h" 48 49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU)) 50 51#define HEX_0X 0 /* 0x1234 */ 52#define HEX_0H 1 /* 01234h */ 53 54/* prototypes --------------------------------------------------------------- */ 55static void 56getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename); 57 58static uint32_t 59write8(FileStream *out, uint8_t byte, uint32_t column); 60 61static uint32_t 62write32(FileStream *out, uint32_t byte, uint32_t column); 63 64#if U_PLATFORM == U_PF_OS400 65static uint32_t 66write8str(FileStream *out, uint8_t byte, uint32_t column); 67#endif 68/* -------------------------------------------------------------------------- */ 69 70/* 71Creating Template Files for New Platforms 72 73Let the cc compiler help you get started. 74Compile this program 75 const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16}; 76with the -S option to produce assembly output. 77 78For example, this will generate array.s: 79gcc -S array.c 80 81This will produce a .s file that may look like this: 82 83 .file "array.c" 84 .version "01.01" 85gcc2_compiled.: 86 .globl x 87 .section .rodata 88 .align 4 89 .type x,@object 90 .size x,20 91x: 92 .long 1 93 .long 2 94 .long -559038737 95 .long -1 96 .long 16 97 .ident "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)" 98 99which gives a starting point that will compile, and can be transformed 100to become the template, generally with some consulting of as docs and 101some experimentation. 102 103If you want ICU to automatically use this assembly, you should 104specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file, 105where the name is the compiler or platform that you used in this 106assemblyHeader data structure. 107*/ 108static const struct AssemblyType { 109 const char *name; 110 const char *header; 111 const char *beginLine; 112 const char *footer; 113 int8_t hexType; /* HEX_0X or HEX_0h */ 114} assemblyHeader[] = { 115 // For gcc assemblers, the meaning of .align changes depending on the 116 // hardware, so we use .balign 16 which always means 16 bytes. 117 // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html 118 {"gcc", 119 ".globl %s\n" 120 "\t.section .note.GNU-stack,\"\",%%progbits\n" 121 "\t.section .rodata\n" 122 "\t.balign 16\n" 123 /* The 3 lines below are added for Chrome. */ 124 "#ifdef U_HIDE_DATA_SYMBOL\n" 125 "\t.hidden %s\n" 126 "#endif\n" 127 "\t.type %s,%%object\n" 128 "%s:\n\n", 129 130 ".long ","",HEX_0X 131 }, 132 {"gcc-darwin", 133 /*"\t.section __TEXT,__text,regular,pure_instructions\n" 134 "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/ 135 ".globl _%s\n" 136 /* The 3 lines below are added for Chrome. */ 137 "#ifdef U_HIDE_DATA_SYMBOL\n" 138 "\t.private_extern _%s\n" 139 "#endif\n" 140 "\t.data\n" 141 "\t.const\n" 142 "\t.balign 16\n" 143 "_%s:\n\n", 144 145 ".long ","",HEX_0X 146 }, 147 {"gcc-cygwin", 148 ".globl _%s\n" 149 "\t.section .rodata\n" 150 "\t.balign 16\n" 151 "_%s:\n\n", 152 153 ".long ","",HEX_0X 154 }, 155 {"gcc-mingw64", 156 ".globl %s\n" 157 "\t.section .rodata\n" 158 "\t.balign 16\n" 159 "%s:\n\n", 160 161 ".long ","",HEX_0X 162 }, 163// 16 bytes alignment. 164// http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf 165 {"sun", 166 "\t.section \".rodata\"\n" 167 "\t.align 16\n" 168 ".globl %s\n" 169 "%s:\n", 170 171 ".word ","",HEX_0X 172 }, 173// 16 bytes alignment for sun-x86. 174// http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html 175 {"sun-x86", 176 "Drodata.rodata:\n" 177 "\t.type Drodata.rodata,@object\n" 178 "\t.size Drodata.rodata,0\n" 179 "\t.globl %s\n" 180 "\t.align 16\n" 181 "%s:\n", 182 183 ".4byte ","",HEX_0X 184 }, 185// 1<<4 bit alignment for aix. 186// http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm 187 {"xlc", 188 ".globl %s{RO}\n" 189 "\t.toc\n" 190 "%s:\n" 191 "\t.csect %s{RO}, 4\n", 192 193 ".long ","",HEX_0X 194 }, 195 {"aCC-ia64", 196 "\t.file \"%s.s\"\n" 197 "\t.type %s,@object\n" 198 "\t.global %s\n" 199 "\t.secalias .abe$0.rodata, \".rodata\"\n" 200 "\t.section .abe$0.rodata = \"a\", \"progbits\"\n" 201 "\t.align 16\n" 202 "%s::\t", 203 204 "data4 ","",HEX_0X 205 }, 206 {"aCC-parisc", 207 "\t.SPACE $TEXT$\n" 208 "\t.SUBSPA $LIT$\n" 209 "%s\n" 210 "\t.EXPORT %s\n" 211 "\t.ALIGN 16\n", 212 213 ".WORD ","",HEX_0X 214 }, 215// align 16 bytes 216// http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx 217 { "masm", 218 "\tTITLE %s\n" 219 "; generated by genccode\n" 220 ".386\n" 221 ".model flat\n" 222 "\tPUBLIC _%s\n" 223 "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n" 224 "\tALIGN 16\n" 225 "_%s\tLABEL DWORD\n", 226 "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H 227 } 228}; 229 230static int32_t assemblyHeaderIndex = -1; 231static int32_t hexType = HEX_0X; 232 233U_CAPI UBool U_EXPORT2 234checkAssemblyHeaderName(const char* optAssembly) { 235 int32_t idx; 236 assemblyHeaderIndex = -1; 237 for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 238 if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) { 239 assemblyHeaderIndex = idx; 240 hexType = assemblyHeader[idx].hexType; /* set the hex type */ 241 return TRUE; 242 } 243 } 244 245 return FALSE; 246} 247 248 249U_CAPI void U_EXPORT2 250printAssemblyHeadersToStdErr(void) { 251 int32_t idx; 252 fprintf(stderr, "%s", assemblyHeader[0].name); 253 for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) { 254 fprintf(stderr, ", %s", assemblyHeader[idx].name); 255 } 256 fprintf(stderr, 257 ")\n"); 258} 259 260U_CAPI void U_EXPORT2 261writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) { 262 uint32_t column = MAX_COLUMN; 263 char entry[64]; 264 uint32_t buffer[1024]; 265 char *bufferStr = (char *)buffer; 266 FileStream *in, *out; 267 size_t i, length; 268 269 in=T_FileStream_open(filename, "rb"); 270 if(in==NULL) { 271 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 272 exit(U_FILE_ACCESS_ERROR); 273 } 274 275 getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename); 276 out=T_FileStream_open(bufferStr, "w"); 277 if(out==NULL) { 278 fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr); 279 exit(U_FILE_ACCESS_ERROR); 280 } 281 282 if (outFilePath != NULL) { 283 uprv_strcpy(outFilePath, bufferStr); 284 } 285 286#ifdef WINDOWS_WITH_GNUC 287 /* Need to fix the file seperator character when using MinGW. */ 288 swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/'); 289#endif 290 291 if(optEntryPoint != NULL) { 292 uprv_strcpy(entry, optEntryPoint); 293 uprv_strcat(entry, "_dat"); 294 } 295 296 /* turn dashes or dots in the entry name into underscores */ 297 length=uprv_strlen(entry); 298 for(i=0; i<length; ++i) { 299 if(entry[i]=='-' || entry[i]=='.') { 300 entry[i]='_'; 301 } 302 } 303 304 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header, 305 entry, entry, entry, entry, 306 entry, entry, entry, entry); 307 T_FileStream_writeLine(out, bufferStr); 308 T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine); 309 310 for(;;) { 311 length=T_FileStream_read(in, buffer, sizeof(buffer)); 312 if(length==0) { 313 break; 314 } 315 if (length != sizeof(buffer)) { 316 /* pad with extra 0's when at the end of the file */ 317 for(i=0; i < (length % sizeof(uint32_t)); ++i) { 318 buffer[length+i] = 0; 319 } 320 } 321 for(i=0; i<(length/sizeof(buffer[0])); i++) { 322 column = write32(out, buffer[i], column); 323 } 324 } 325 326 T_FileStream_writeLine(out, "\n"); 327 328 sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer, 329 entry, entry, entry, entry, 330 entry, entry, entry, entry); 331 T_FileStream_writeLine(out, bufferStr); 332 333 if(T_FileStream_error(in)) { 334 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 335 exit(U_FILE_ACCESS_ERROR); 336 } 337 338 if(T_FileStream_error(out)) { 339 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 340 exit(U_FILE_ACCESS_ERROR); 341 } 342 343 T_FileStream_close(out); 344 T_FileStream_close(in); 345} 346 347U_CAPI void U_EXPORT2 348writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) { 349 uint32_t column = MAX_COLUMN; 350 char buffer[4096], entry[64]; 351 FileStream *in, *out; 352 size_t i, length; 353 354 in=T_FileStream_open(filename, "rb"); 355 if(in==NULL) { 356 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 357 exit(U_FILE_ACCESS_ERROR); 358 } 359 360 if(optName != NULL) { /* prepend 'icudt28_' */ 361 strcpy(entry, optName); 362 strcat(entry, "_"); 363 } else { 364 entry[0] = 0; 365 } 366 367 getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename); 368 if (outFilePath != NULL) { 369 uprv_strcpy(outFilePath, buffer); 370 } 371 out=T_FileStream_open(buffer, "w"); 372 if(out==NULL) { 373 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 374 exit(U_FILE_ACCESS_ERROR); 375 } 376 377 /* turn dashes or dots in the entry name into underscores */ 378 length=uprv_strlen(entry); 379 for(i=0; i<length; ++i) { 380 if(entry[i]=='-' || entry[i]=='.') { 381 entry[i]='_'; 382 } 383 } 384 385#if U_PLATFORM == U_PF_OS400 386 /* 387 TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c 388 389 This is here because this platform can't currently put 390 const data into the read-only pages of an object or 391 shared library (service program). Only strings are allowed in read-only 392 pages, so we use char * strings to store the data. 393 394 In order to prevent the beginning of the data from ever matching the 395 magic numbers we must still use the initial double. 396 [grhoten 4/24/2003] 397 */ 398 sprintf(buffer, 399 "#ifndef IN_GENERATED_CCODE\n" 400 "#define IN_GENERATED_CCODE\n" 401 "#define U_DISABLE_RENAMING 1\n" 402 "#include \"unicode/umachine.h\"\n" 403 "#endif\n" 404 "U_CDECL_BEGIN\n" 405 "const struct {\n" 406 " double bogus;\n" 407 " const char *bytes; \n" 408 "} %s={ 0.0, \n", 409 entry); 410 T_FileStream_writeLine(out, buffer); 411 412 for(;;) { 413 length=T_FileStream_read(in, buffer, sizeof(buffer)); 414 if(length==0) { 415 break; 416 } 417 for(i=0; i<length; ++i) { 418 column = write8str(out, (uint8_t)buffer[i], column); 419 } 420 } 421 422 T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n"); 423#else 424 /* Function renaming shouldn't be done in data */ 425 sprintf(buffer, 426 "#ifndef IN_GENERATED_CCODE\n" 427 "#define IN_GENERATED_CCODE\n" 428 "#define U_DISABLE_RENAMING 1\n" 429 "#include \"unicode/umachine.h\"\n" 430 "#endif\n" 431 "U_CDECL_BEGIN\n" 432 "const struct {\n" 433 " double bogus;\n" 434 " uint8_t bytes[%ld]; \n" 435 "} %s={ 0.0, {\n", 436 (long)T_FileStream_size(in), entry); 437 T_FileStream_writeLine(out, buffer); 438 439 for(;;) { 440 length=T_FileStream_read(in, buffer, sizeof(buffer)); 441 if(length==0) { 442 break; 443 } 444 for(i=0; i<length; ++i) { 445 column = write8(out, (uint8_t)buffer[i], column); 446 } 447 } 448 449 T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n"); 450#endif 451 452 if(T_FileStream_error(in)) { 453 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 454 exit(U_FILE_ACCESS_ERROR); 455 } 456 457 if(T_FileStream_error(out)) { 458 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 459 exit(U_FILE_ACCESS_ERROR); 460 } 461 462 T_FileStream_close(out); 463 T_FileStream_close(in); 464} 465 466static uint32_t 467write32(FileStream *out, uint32_t bitField, uint32_t column) { 468 int32_t i; 469 char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */ 470 char *s = bitFieldStr; 471 uint8_t *ptrIdx = (uint8_t *)&bitField; 472 static const char hexToStr[16] = { 473 '0','1','2','3', 474 '4','5','6','7', 475 '8','9','A','B', 476 'C','D','E','F' 477 }; 478 479 /* write the value, possibly with comma and newline */ 480 if(column==MAX_COLUMN) { 481 /* first byte */ 482 column=1; 483 } else if(column<32) { 484 *(s++)=','; 485 ++column; 486 } else { 487 *(s++)='\n'; 488 uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine); 489 s+=uprv_strlen(s); 490 column=1; 491 } 492 493 if (bitField < 10) { 494 /* It's a small number. Don't waste the space for 0x */ 495 *(s++)=hexToStr[bitField]; 496 } 497 else { 498 int seenNonZero = 0; /* This is used to remove leading zeros */ 499 500 if(hexType==HEX_0X) { 501 *(s++)='0'; 502 *(s++)='x'; 503 } else if(hexType==HEX_0H) { 504 *(s++)='0'; 505 } 506 507 /* This creates a 32-bit field */ 508#if U_IS_BIG_ENDIAN 509 for (i = 0; i < sizeof(uint32_t); i++) 510#else 511 for (i = sizeof(uint32_t)-1; i >= 0 ; i--) 512#endif 513 { 514 uint8_t value = ptrIdx[i]; 515 if (value || seenNonZero) { 516 *(s++)=hexToStr[value>>4]; 517 *(s++)=hexToStr[value&0xF]; 518 seenNonZero = 1; 519 } 520 } 521 if(hexType==HEX_0H) { 522 *(s++)='h'; 523 } 524 } 525 526 *(s++)=0; 527 T_FileStream_writeLine(out, bitFieldStr); 528 return column; 529} 530 531static uint32_t 532write8(FileStream *out, uint8_t byte, uint32_t column) { 533 char s[4]; 534 int i=0; 535 536 /* convert the byte value to a string */ 537 if(byte>=100) { 538 s[i++]=(char)('0'+byte/100); 539 byte%=100; 540 } 541 if(i>0 || byte>=10) { 542 s[i++]=(char)('0'+byte/10); 543 byte%=10; 544 } 545 s[i++]=(char)('0'+byte); 546 s[i]=0; 547 548 /* write the value, possibly with comma and newline */ 549 if(column==MAX_COLUMN) { 550 /* first byte */ 551 column=1; 552 } else if(column<16) { 553 T_FileStream_writeLine(out, ","); 554 ++column; 555 } else { 556 T_FileStream_writeLine(out, ",\n"); 557 column=1; 558 } 559 T_FileStream_writeLine(out, s); 560 return column; 561} 562 563#if U_PLATFORM == U_PF_OS400 564static uint32_t 565write8str(FileStream *out, uint8_t byte, uint32_t column) { 566 char s[8]; 567 568 if (byte > 7) 569 sprintf(s, "\\x%X", byte); 570 else 571 sprintf(s, "\\%X", byte); 572 573 /* write the value, possibly with comma and newline */ 574 if(column==MAX_COLUMN) { 575 /* first byte */ 576 column=1; 577 T_FileStream_writeLine(out, "\""); 578 } else if(column<24) { 579 ++column; 580 } else { 581 T_FileStream_writeLine(out, "\"\n\""); 582 column=1; 583 } 584 T_FileStream_writeLine(out, s); 585 return column; 586} 587#endif 588 589static void 590getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) { 591 const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.'); 592 593 /* copy path */ 594 if(destdir!=NULL && *destdir!=0) { 595 do { 596 *outFilename++=*destdir++; 597 } while(*destdir!=0); 598 if(*(outFilename-1)!=U_FILE_SEP_CHAR) { 599 *outFilename++=U_FILE_SEP_CHAR; 600 } 601 inFilename=basename; 602 } else { 603 while(inFilename<basename) { 604 *outFilename++=*inFilename++; 605 } 606 } 607 608 if(suffix==NULL) { 609 /* the filename does not have a suffix */ 610 uprv_strcpy(entryName, inFilename); 611 if(optFilename != NULL) { 612 uprv_strcpy(outFilename, optFilename); 613 } else { 614 uprv_strcpy(outFilename, inFilename); 615 } 616 uprv_strcat(outFilename, newSuffix); 617 } else { 618 char *saveOutFilename = outFilename; 619 /* copy basename */ 620 while(inFilename<suffix) { 621 if(*inFilename=='-') { 622 /* iSeries cannot have '-' in the .o objects. */ 623 *outFilename++=*entryName++='_'; 624 inFilename++; 625 } 626 else { 627 *outFilename++=*entryName++=*inFilename++; 628 } 629 } 630 631 /* replace '.' by '_' */ 632 *outFilename++=*entryName++='_'; 633 ++inFilename; 634 635 /* copy suffix */ 636 while(*inFilename!=0) { 637 *outFilename++=*entryName++=*inFilename++; 638 } 639 640 *entryName=0; 641 642 if(optFilename != NULL) { 643 uprv_strcpy(saveOutFilename, optFilename); 644 uprv_strcat(saveOutFilename, newSuffix); 645 } else { 646 /* add ".c" */ 647 uprv_strcpy(outFilename, newSuffix); 648 } 649 } 650} 651 652#ifdef CAN_GENERATE_OBJECTS 653static void 654getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) { 655 union { 656 char bytes[2048]; 657#ifdef U_ELF 658 Elf32_Ehdr header32; 659 /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */ 660#elif U_PLATFORM_HAS_WIN32_API 661 IMAGE_FILE_HEADER header; 662#endif 663 } buffer; 664 665 const char *filename; 666 FileStream *in; 667 int32_t length; 668 669#ifdef U_ELF 670 671#elif U_PLATFORM_HAS_WIN32_API 672 const IMAGE_FILE_HEADER *pHeader; 673#else 674# error "Unknown platform for CAN_GENERATE_OBJECTS." 675#endif 676 677 if(optMatchArch != NULL) { 678 filename=optMatchArch; 679 } else { 680 /* set defaults */ 681#ifdef U_ELF 682 /* set EM_386 because elf.h does not provide better defaults */ 683 *pCPU=EM_386; 684 *pBits=32; 685 *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB); 686#elif U_PLATFORM_HAS_WIN32_API 687/* _M_IA64 should be defined in windows.h */ 688# if defined(_M_IA64) 689 *pCPU=IMAGE_FILE_MACHINE_IA64; 690# elif defined(_M_AMD64) 691 *pCPU=IMAGE_FILE_MACHINE_AMD64; 692# else 693 *pCPU=IMAGE_FILE_MACHINE_I386; 694# endif 695 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 696 *pIsBigEndian=FALSE; 697#else 698# error "Unknown platform for CAN_GENERATE_OBJECTS." 699#endif 700 return; 701 } 702 703 in=T_FileStream_open(filename, "rb"); 704 if(in==NULL) { 705 fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename); 706 exit(U_FILE_ACCESS_ERROR); 707 } 708 length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes)); 709 710#ifdef U_ELF 711 if(length<sizeof(Elf32_Ehdr)) { 712 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 713 exit(U_UNSUPPORTED_ERROR); 714 } 715 if( 716 buffer.header32.e_ident[0]!=ELFMAG0 || 717 buffer.header32.e_ident[1]!=ELFMAG1 || 718 buffer.header32.e_ident[2]!=ELFMAG2 || 719 buffer.header32.e_ident[3]!=ELFMAG3 || 720 buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64 721 ) { 722 fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename); 723 exit(U_UNSUPPORTED_ERROR); 724 } 725 726 *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */ 727#ifdef U_ELF64 728 if(*pBits!=32 && *pBits!=64) { 729 fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n"); 730 exit(U_UNSUPPORTED_ERROR); 731 } 732#else 733 if(*pBits!=32) { 734 fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n"); 735 exit(U_UNSUPPORTED_ERROR); 736 } 737#endif 738 739 *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB); 740 if(*pIsBigEndian!=U_IS_BIG_ENDIAN) { 741 fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n"); 742 exit(U_UNSUPPORTED_ERROR); 743 } 744 /* TODO: Support byte swapping */ 745 746 *pCPU=buffer.header32.e_machine; 747#elif U_PLATFORM_HAS_WIN32_API 748 if(length<sizeof(IMAGE_FILE_HEADER)) { 749 fprintf(stderr, "genccode: match-arch file %s is too short\n", filename); 750 exit(U_UNSUPPORTED_ERROR); 751 } 752 /* TODO: Use buffer.header. Keep aliasing legal. */ 753 pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes; 754 *pCPU=pHeader->Machine; 755 /* 756 * The number of bits is implicit with the Machine value. 757 * *pBits is ignored in the calling code, so this need not be precise. 758 */ 759 *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64; 760 /* Windows always runs on little-endian CPUs. */ 761 *pIsBigEndian=FALSE; 762#else 763# error "Unknown platform for CAN_GENERATE_OBJECTS." 764#endif 765 766 T_FileStream_close(in); 767} 768 769U_CAPI void U_EXPORT2 770writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) { 771 /* common variables */ 772 char buffer[4096], entry[40]={ 0 }; 773 FileStream *in, *out; 774 const char *newSuffix; 775 int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0; 776 777 uint16_t cpu, bits; 778 UBool makeBigEndian; 779 780 /* platform-specific variables and initialization code */ 781#ifdef U_ELF 782 /* 32-bit Elf file header */ 783 static Elf32_Ehdr header32={ 784 { 785 /* e_ident[] */ 786 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 787 ELFCLASS32, 788 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 789 EV_CURRENT /* EI_VERSION */ 790 }, 791 ET_REL, 792 EM_386, 793 EV_CURRENT, /* e_version */ 794 0, /* e_entry */ 795 0, /* e_phoff */ 796 (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */ 797 0, /* e_flags */ 798 (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */ 799 0, /* e_phentsize */ 800 0, /* e_phnum */ 801 (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */ 802 5, /* e_shnum */ 803 2 /* e_shstrndx */ 804 }; 805 806 /* 32-bit Elf section header table */ 807 static Elf32_Shdr sectionHeaders32[5]={ 808 { /* SHN_UNDEF */ 809 0 810 }, 811 { /* .symtab */ 812 1, /* sh_name */ 813 SHT_SYMTAB, 814 0, /* sh_flags */ 815 0, /* sh_addr */ 816 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */ 817 (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */ 818 3, /* sh_link=sect hdr index of .strtab */ 819 1, /* sh_info=One greater than the symbol table index of the last 820 * local symbol (with STB_LOCAL). */ 821 4, /* sh_addralign */ 822 (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */ 823 }, 824 { /* .shstrtab */ 825 9, /* sh_name */ 826 SHT_STRTAB, 827 0, /* sh_flags */ 828 0, /* sh_addr */ 829 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */ 830 40, /* sh_size */ 831 0, /* sh_link */ 832 0, /* sh_info */ 833 1, /* sh_addralign */ 834 0 /* sh_entsize */ 835 }, 836 { /* .strtab */ 837 19, /* sh_name */ 838 SHT_STRTAB, 839 0, /* sh_flags */ 840 0, /* sh_addr */ 841 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */ 842 (Elf32_Word)sizeof(entry), /* sh_size */ 843 0, /* sh_link */ 844 0, /* sh_info */ 845 1, /* sh_addralign */ 846 0 /* sh_entsize */ 847 }, 848 { /* .rodata */ 849 27, /* sh_name */ 850 SHT_PROGBITS, 851 SHF_ALLOC, /* sh_flags */ 852 0, /* sh_addr */ 853 (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */ 854 0, /* sh_size */ 855 0, /* sh_link */ 856 0, /* sh_info */ 857 16, /* sh_addralign */ 858 0 /* sh_entsize */ 859 } 860 }; 861 862 /* symbol table */ 863 static Elf32_Sym symbols32[2]={ 864 { /* STN_UNDEF */ 865 0 866 }, 867 { /* data entry point */ 868 1, /* st_name */ 869 0, /* st_value */ 870 0, /* st_size */ 871 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 872 0, /* st_other */ 873 4 /* st_shndx=index of related section table entry */ 874 } 875 }; 876 877 /* section header string table, with decimal string offsets */ 878 static const char sectionStrings[40]= 879 /* 0 */ "\0" 880 /* 1 */ ".symtab\0" 881 /* 9 */ ".shstrtab\0" 882 /* 19 */ ".strtab\0" 883 /* 27 */ ".rodata\0" 884 /* 35 */ "\0\0\0\0"; /* contains terminating NUL */ 885 /* 40: padded to multiple of 8 bytes */ 886 887 /* 888 * Use entry[] for the string table which will contain only the 889 * entry point name. 890 * entry[0] must be 0 (NUL) 891 * The entry point name can be up to 38 characters long (sizeof(entry)-2). 892 */ 893 894 /* 16-align .rodata in the .o file, just in case */ 895 static const char padding[16]={ 0 }; 896 int32_t paddingSize; 897 898#ifdef U_ELF64 899 /* 64-bit Elf file header */ 900 static Elf64_Ehdr header64={ 901 { 902 /* e_ident[] */ 903 ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3, 904 ELFCLASS64, 905 U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB, 906 EV_CURRENT /* EI_VERSION */ 907 }, 908 ET_REL, 909 EM_X86_64, 910 EV_CURRENT, /* e_version */ 911 0, /* e_entry */ 912 0, /* e_phoff */ 913 (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */ 914 0, /* e_flags */ 915 (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */ 916 0, /* e_phentsize */ 917 0, /* e_phnum */ 918 (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */ 919 5, /* e_shnum */ 920 2 /* e_shstrndx */ 921 }; 922 923 /* 64-bit Elf section header table */ 924 static Elf64_Shdr sectionHeaders64[5]={ 925 { /* SHN_UNDEF */ 926 0 927 }, 928 { /* .symtab */ 929 1, /* sh_name */ 930 SHT_SYMTAB, 931 0, /* sh_flags */ 932 0, /* sh_addr */ 933 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */ 934 (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */ 935 3, /* sh_link=sect hdr index of .strtab */ 936 1, /* sh_info=One greater than the symbol table index of the last 937 * local symbol (with STB_LOCAL). */ 938 4, /* sh_addralign */ 939 (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */ 940 }, 941 { /* .shstrtab */ 942 9, /* sh_name */ 943 SHT_STRTAB, 944 0, /* sh_flags */ 945 0, /* sh_addr */ 946 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */ 947 40, /* sh_size */ 948 0, /* sh_link */ 949 0, /* sh_info */ 950 1, /* sh_addralign */ 951 0 /* sh_entsize */ 952 }, 953 { /* .strtab */ 954 19, /* sh_name */ 955 SHT_STRTAB, 956 0, /* sh_flags */ 957 0, /* sh_addr */ 958 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */ 959 (Elf64_Xword)sizeof(entry), /* sh_size */ 960 0, /* sh_link */ 961 0, /* sh_info */ 962 1, /* sh_addralign */ 963 0 /* sh_entsize */ 964 }, 965 { /* .rodata */ 966 27, /* sh_name */ 967 SHT_PROGBITS, 968 SHF_ALLOC, /* sh_flags */ 969 0, /* sh_addr */ 970 (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */ 971 0, /* sh_size */ 972 0, /* sh_link */ 973 0, /* sh_info */ 974 16, /* sh_addralign */ 975 0 /* sh_entsize */ 976 } 977 }; 978 979 /* 980 * 64-bit symbol table 981 * careful: different order of items compared with Elf32_sym! 982 */ 983 static Elf64_Sym symbols64[2]={ 984 { /* STN_UNDEF */ 985 0 986 }, 987 { /* data entry point */ 988 1, /* st_name */ 989 ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT), 990 0, /* st_other */ 991 4, /* st_shndx=index of related section table entry */ 992 0, /* st_value */ 993 0 /* st_size */ 994 } 995 }; 996 997#endif /* U_ELF64 */ 998 999 /* entry[] have a leading NUL */ 1000 entryOffset=1; 1001 1002 /* in the common code, count entryLength from after the NUL */ 1003 entryLengthOffset=1; 1004 1005 newSuffix=".o"; 1006 1007#elif U_PLATFORM_HAS_WIN32_API 1008 struct { 1009 IMAGE_FILE_HEADER fileHeader; 1010 IMAGE_SECTION_HEADER sections[2]; 1011 char linkerOptions[100]; 1012 } objHeader; 1013 IMAGE_SYMBOL symbols[1]; 1014 struct { 1015 DWORD sizeofLongNames; 1016 char longNames[100]; 1017 } symbolNames; 1018 1019 /* 1020 * entry sometimes have a leading '_' 1021 * overwritten if entryOffset==0 depending on the target platform 1022 * see check for cpu below 1023 */ 1024 entry[0]='_'; 1025 1026 newSuffix=".obj"; 1027#else 1028# error "Unknown platform for CAN_GENERATE_OBJECTS." 1029#endif 1030 1031 /* deal with options, files and the entry point name */ 1032 getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch); 1033 printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian); 1034#if U_PLATFORM_HAS_WIN32_API 1035 if(cpu==IMAGE_FILE_MACHINE_I386) { 1036 entryOffset=1; 1037 } 1038#endif 1039 1040 in=T_FileStream_open(filename, "rb"); 1041 if(in==NULL) { 1042 fprintf(stderr, "genccode: unable to open input file %s\n", filename); 1043 exit(U_FILE_ACCESS_ERROR); 1044 } 1045 size=T_FileStream_size(in); 1046 1047 getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename); 1048 if (outFilePath != NULL) { 1049 uprv_strcpy(outFilePath, buffer); 1050 } 1051 1052 if(optEntryPoint != NULL) { 1053 uprv_strcpy(entry+entryOffset, optEntryPoint); 1054 uprv_strcat(entry+entryOffset, "_dat"); 1055 } 1056 /* turn dashes in the entry name into underscores */ 1057 entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset); 1058 for(i=0; i<entryLength; ++i) { 1059 if(entry[entryLengthOffset+i]=='-') { 1060 entry[entryLengthOffset+i]='_'; 1061 } 1062 } 1063 1064 /* open the output file */ 1065 out=T_FileStream_open(buffer, "wb"); 1066 if(out==NULL) { 1067 fprintf(stderr, "genccode: unable to open output file %s\n", buffer); 1068 exit(U_FILE_ACCESS_ERROR); 1069 } 1070 1071#ifdef U_ELF 1072 if(bits==32) { 1073 header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1074 header32.e_machine=cpu; 1075 1076 /* 16-align .rodata in the .o file, just in case */ 1077 paddingSize=sectionHeaders32[4].sh_offset & 0xf; 1078 if(paddingSize!=0) { 1079 paddingSize=0x10-paddingSize; 1080 sectionHeaders32[4].sh_offset+=paddingSize; 1081 } 1082 1083 sectionHeaders32[4].sh_size=(Elf32_Word)size; 1084 1085 symbols32[1].st_size=(Elf32_Word)size; 1086 1087 /* write .o headers */ 1088 T_FileStream_write(out, &header32, (int32_t)sizeof(header32)); 1089 T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32)); 1090 T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32)); 1091 } else /* bits==64 */ { 1092#ifdef U_ELF64 1093 header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB; 1094 header64.e_machine=cpu; 1095 1096 /* 16-align .rodata in the .o file, just in case */ 1097 paddingSize=sectionHeaders64[4].sh_offset & 0xf; 1098 if(paddingSize!=0) { 1099 paddingSize=0x10-paddingSize; 1100 sectionHeaders64[4].sh_offset+=paddingSize; 1101 } 1102 1103 sectionHeaders64[4].sh_size=(Elf64_Xword)size; 1104 1105 symbols64[1].st_size=(Elf64_Xword)size; 1106 1107 /* write .o headers */ 1108 T_FileStream_write(out, &header64, (int32_t)sizeof(header64)); 1109 T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64)); 1110 T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64)); 1111#endif 1112 } 1113 1114 T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings)); 1115 T_FileStream_write(out, entry, (int32_t)sizeof(entry)); 1116 if(paddingSize!=0) { 1117 T_FileStream_write(out, padding, paddingSize); 1118 } 1119#elif U_PLATFORM_HAS_WIN32_API 1120 /* populate the .obj headers */ 1121 uprv_memset(&objHeader, 0, sizeof(objHeader)); 1122 uprv_memset(&symbols, 0, sizeof(symbols)); 1123 uprv_memset(&symbolNames, 0, sizeof(symbolNames)); 1124 1125 /* write the linker export directive */ 1126 uprv_strcpy(objHeader.linkerOptions, "-export:"); 1127 length=8; 1128 uprv_strcpy(objHeader.linkerOptions+length, entry); 1129 length+=entryLength; 1130 uprv_strcpy(objHeader.linkerOptions+length, ",data "); 1131 length+=6; 1132 1133 /* set the file header */ 1134 objHeader.fileHeader.Machine=cpu; 1135 objHeader.fileHeader.NumberOfSections=2; 1136 objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL); 1137 objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */ 1138 objHeader.fileHeader.NumberOfSymbols=1; 1139 1140 /* set the section for the linker options */ 1141 uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8); 1142 objHeader.sections[0].SizeOfRawData=length; 1143 objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER; 1144 objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES; 1145 1146 /* set the data section */ 1147 uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6); 1148 objHeader.sections[1].SizeOfRawData=size; 1149 objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length; 1150 objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ; 1151 1152 /* set the symbol table */ 1153 if(entryLength<=8) { 1154 uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength); 1155 symbolNames.sizeofLongNames=4; 1156 } else { 1157 symbols[0].N.Name.Short=0; 1158 symbols[0].N.Name.Long=4; 1159 symbolNames.sizeofLongNames=4+entryLength+1; 1160 uprv_strcpy(symbolNames.longNames, entry); 1161 } 1162 symbols[0].SectionNumber=2; 1163 symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL; 1164 1165 /* write the file header and the linker options section */ 1166 T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData); 1167#else 1168# error "Unknown platform for CAN_GENERATE_OBJECTS." 1169#endif 1170 1171 /* copy the data file into section 2 */ 1172 for(;;) { 1173 length=T_FileStream_read(in, buffer, sizeof(buffer)); 1174 if(length==0) { 1175 break; 1176 } 1177 T_FileStream_write(out, buffer, (int32_t)length); 1178 } 1179 1180#if U_PLATFORM_HAS_WIN32_API 1181 /* write the symbol table */ 1182 T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL); 1183 T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames); 1184#endif 1185 1186 if(T_FileStream_error(in)) { 1187 fprintf(stderr, "genccode: file read error while generating from file %s\n", filename); 1188 exit(U_FILE_ACCESS_ERROR); 1189 } 1190 1191 if(T_FileStream_error(out)) { 1192 fprintf(stderr, "genccode: file write error while generating from file %s\n", filename); 1193 exit(U_FILE_ACCESS_ERROR); 1194 } 1195 1196 T_FileStream_close(out); 1197 T_FileStream_close(in); 1198} 1199#endif 1200