1/******************************************************************************
2 *   Copyright (C) 2009, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#ifdef U_WINDOWS
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#ifdef U_LINUX
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54#if defined(U_WINDOWS) || defined(U_ELF)
55#define CAN_GENERATE_OBJECTS
56#endif
57
58/* prototypes --------------------------------------------------------------- */
59static void
60getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
61
62static uint32_t
63write8(FileStream *out, uint8_t byte, uint32_t column);
64
65static uint32_t
66write32(FileStream *out, uint32_t byte, uint32_t column);
67
68#ifdef OS400
69static uint32_t
70write8str(FileStream *out, uint8_t byte, uint32_t column);
71#endif
72/* -------------------------------------------------------------------------- */
73
74/*
75Creating Template Files for New Platforms
76
77Let the cc compiler help you get started.
78Compile this program
79    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
80with the -S option to produce assembly output.
81
82For example, this will generate array.s:
83gcc -S array.c
84
85This will produce a .s file that may look like this:
86
87    .file   "array.c"
88    .version        "01.01"
89gcc2_compiled.:
90    .globl x
91    .section        .rodata
92    .align 4
93    .type    x,@object
94    .size    x,20
95x:
96    .long   1
97    .long   2
98    .long   -559038737
99    .long   -1
100    .long   16
101    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
102
103which gives a starting point that will compile, and can be transformed
104to become the template, generally with some consulting of as docs and
105some experimentation.
106
107If you want ICU to automatically use this assembly, you should
108specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
109where the name is the compiler or platform that you used in this
110assemblyHeader data structure.
111*/
112static const struct AssemblyType {
113    const char *name;
114    const char *header;
115    const char *beginLine;
116    const char *footer;
117    int8_t      hexType; /* HEX_0X or HEX_0h */
118} assemblyHeader[] = {
119    {"gcc",
120        ".globl %s\n"
121        "\t.section .note.GNU-stack,\"\",@progbits\n"
122        "\t.section .rodata\n"
123        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
124        "\t.type %s,@object\n"
125        "%s:\n\n",
126
127        ".long ","",HEX_0X
128    },
129    {"gcc-darwin",
130        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
131        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
132        ".globl _%s\n"
133        "\t.data\n"
134        "\t.const\n"
135        "\t.align 4\n"  /* 1<<4 = 16 */
136        "_%s:\n\n",
137
138        ".long ","",HEX_0X
139    },
140    {"gcc-cygwin",
141        ".globl _%s\n"
142        "\t.section .rodata\n"
143        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
144        "_%s:\n\n",
145
146        ".long ","",HEX_0X
147    },
148    {"sun",
149        "\t.section \".rodata\"\n"
150        "\t.align   8\n"
151        ".globl     %s\n"
152        "%s:\n",
153
154        ".word ","",HEX_0X
155    },
156    {"sun-x86",
157        "Drodata.rodata:\n"
158        "\t.type   Drodata.rodata,@object\n"
159        "\t.size   Drodata.rodata,0\n"
160        "\t.globl  %s\n"
161        "\t.align  8\n"
162        "%s:\n",
163
164        ".4byte ","",HEX_0X
165    },
166    {"xlc",
167        ".globl %s{RO}\n"
168        "\t.toc\n"
169        "%s:\n"
170        "\t.csect %s{RO}, 4\n",
171
172        ".long ","",HEX_0X
173    },
174    {"aCC-ia64",
175        "\t.file   \"%s.s\"\n"
176        "\t.type   %s,@object\n"
177        "\t.global %s\n"
178        "\t.secalias .abe$0.rodata, \".rodata\"\n"
179        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
180        "\t.align  16\n"
181        "%s::\t",
182
183        "data4 ","",HEX_0X
184    },
185    {"aCC-parisc",
186        "\t.SPACE  $TEXT$\n"
187        "\t.SUBSPA $LIT$\n"
188        "%s\n"
189        "\t.EXPORT %s\n"
190        "\t.ALIGN  16\n",
191
192        ".WORD ","",HEX_0X
193    },
194    { "masm",
195      "\tTITLE %s\n"
196      "; generated by genccode\n"
197      ".386\n"
198      ".model flat\n"
199      "\tPUBLIC _%s\n"
200      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
201      "\tALIGN 16\n"
202      "_%s\tLABEL DWORD\n",
203      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
204    }
205};
206
207static int32_t assemblyHeaderIndex = -1;
208static int32_t hexType = HEX_0X;
209
210U_CAPI UBool U_EXPORT2
211checkAssemblyHeaderName(const char* optAssembly) {
212    int32_t idx;
213    assemblyHeaderIndex = -1;
214    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
215        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
216            assemblyHeaderIndex = idx;
217            hexType = assemblyHeader[idx].hexType; /* set the hex type */
218            return TRUE;
219        }
220    }
221
222    return FALSE;
223}
224
225
226U_CAPI void U_EXPORT2
227printAssemblyHeadersToStdErr(void) {
228    int32_t idx;
229    fprintf(stderr, "%s", assemblyHeader[0].name);
230    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
231        fprintf(stderr, ", %s", assemblyHeader[idx].name);
232    }
233    fprintf(stderr,
234        ")\n");
235}
236
237U_CAPI void U_EXPORT2
238writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
239    uint32_t column = MAX_COLUMN;
240    char entry[64];
241    uint32_t buffer[1024];
242    char *bufferStr = (char *)buffer;
243    FileStream *in, *out;
244    size_t i, length;
245
246    in=T_FileStream_open(filename, "rb");
247    if(in==NULL) {
248        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
249        exit(U_FILE_ACCESS_ERROR);
250    }
251
252    getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
253    out=T_FileStream_open(bufferStr, "w");
254    if(out==NULL) {
255        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
256        exit(U_FILE_ACCESS_ERROR);
257    }
258
259    if (outFilePath != NULL) {
260        uprv_strcpy(outFilePath, bufferStr);
261    }
262
263#ifdef WINDOWS_WITH_GNUC
264    /* Need to fix the file seperator character when using MinGW. */
265    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
266#endif
267
268    if(optEntryPoint != NULL) {
269        uprv_strcpy(entry, optEntryPoint);
270        uprv_strcat(entry, "_dat");
271    }
272
273    /* turn dashes or dots in the entry name into underscores */
274    length=uprv_strlen(entry);
275    for(i=0; i<length; ++i) {
276        if(entry[i]=='-' || entry[i]=='.') {
277            entry[i]='_';
278        }
279    }
280
281    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
282        entry, entry, entry, entry,
283        entry, entry, entry, entry);
284    T_FileStream_writeLine(out, bufferStr);
285    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
286
287    for(;;) {
288        length=T_FileStream_read(in, buffer, sizeof(buffer));
289        if(length==0) {
290            break;
291        }
292        if (length != sizeof(buffer)) {
293            /* pad with extra 0's when at the end of the file */
294            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
295                buffer[length+i] = 0;
296            }
297        }
298        for(i=0; i<(length/sizeof(buffer[0])); i++) {
299            column = write32(out, buffer[i], column);
300        }
301    }
302
303    T_FileStream_writeLine(out, "\n");
304
305    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
306        entry, entry, entry, entry,
307        entry, entry, entry, entry);
308    T_FileStream_writeLine(out, bufferStr);
309
310    if(T_FileStream_error(in)) {
311        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
312        exit(U_FILE_ACCESS_ERROR);
313    }
314
315    if(T_FileStream_error(out)) {
316        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
317        exit(U_FILE_ACCESS_ERROR);
318    }
319
320    T_FileStream_close(out);
321    T_FileStream_close(in);
322}
323
324U_CAPI void U_EXPORT2
325writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
326    uint32_t column = MAX_COLUMN;
327    char buffer[4096], entry[64];
328    FileStream *in, *out;
329    size_t i, length;
330
331    in=T_FileStream_open(filename, "rb");
332    if(in==NULL) {
333        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
334        exit(U_FILE_ACCESS_ERROR);
335    }
336
337    if(optName != NULL) { /* prepend  'icudt28_' */
338      strcpy(entry, optName);
339      strcat(entry, "_");
340    } else {
341      entry[0] = 0;
342    }
343
344    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
345    if (outFilePath != NULL) {
346        uprv_strcpy(outFilePath, buffer);
347    }
348    out=T_FileStream_open(buffer, "w");
349    if(out==NULL) {
350        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
351        exit(U_FILE_ACCESS_ERROR);
352    }
353
354    /* turn dashes or dots in the entry name into underscores */
355    length=uprv_strlen(entry);
356    for(i=0; i<length; ++i) {
357        if(entry[i]=='-' || entry[i]=='.') {
358            entry[i]='_';
359        }
360    }
361
362#ifdef OS400
363    /*
364    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
365
366    This is here because this platform can't currently put
367    const data into the read-only pages of an object or
368    shared library (service program). Only strings are allowed in read-only
369    pages, so we use char * strings to store the data.
370
371    In order to prevent the beginning of the data from ever matching the
372    magic numbers we must still use the initial double.
373    [grhoten 4/24/2003]
374    */
375    sprintf(buffer,
376        "#define U_DISABLE_RENAMING 1\n"
377        "#include \"unicode/umachine.h\"\n"
378        "U_CDECL_BEGIN\n"
379        "const struct {\n"
380        "    double bogus;\n"
381        "    const char *bytes; \n"
382        "} %s={ 0.0, \n",
383        entry);
384    T_FileStream_writeLine(out, buffer);
385
386    for(;;) {
387        length=T_FileStream_read(in, buffer, sizeof(buffer));
388        if(length==0) {
389            break;
390        }
391        for(i=0; i<length; ++i) {
392            column = write8str(out, (uint8_t)buffer[i], column);
393        }
394    }
395
396    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
397#else
398    /* Function renaming shouldn't be done in data */
399    sprintf(buffer,
400        "#define U_DISABLE_RENAMING 1\n"
401        "#include \"unicode/umachine.h\"\n"
402        "U_CDECL_BEGIN\n"
403        "const struct {\n"
404        "    double bogus;\n"
405        "    uint8_t bytes[%ld]; \n"
406        "} %s={ 0.0, {\n",
407        (long)T_FileStream_size(in), entry);
408    T_FileStream_writeLine(out, buffer);
409
410    for(;;) {
411        length=T_FileStream_read(in, buffer, sizeof(buffer));
412        if(length==0) {
413            break;
414        }
415        for(i=0; i<length; ++i) {
416            column = write8(out, (uint8_t)buffer[i], column);
417        }
418    }
419
420    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
421#endif
422
423    if(T_FileStream_error(in)) {
424        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
425        exit(U_FILE_ACCESS_ERROR);
426    }
427
428    if(T_FileStream_error(out)) {
429        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
430        exit(U_FILE_ACCESS_ERROR);
431    }
432
433    T_FileStream_close(out);
434    T_FileStream_close(in);
435}
436
437static uint32_t
438write32(FileStream *out, uint32_t bitField, uint32_t column) {
439    int32_t i;
440    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
441    char *s = bitFieldStr;
442    uint8_t *ptrIdx = (uint8_t *)&bitField;
443    static const char hexToStr[16] = {
444        '0','1','2','3',
445        '4','5','6','7',
446        '8','9','A','B',
447        'C','D','E','F'
448    };
449
450    /* write the value, possibly with comma and newline */
451    if(column==MAX_COLUMN) {
452        /* first byte */
453        column=1;
454    } else if(column<32) {
455        *(s++)=',';
456        ++column;
457    } else {
458        *(s++)='\n';
459        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
460        s+=uprv_strlen(s);
461        column=1;
462    }
463
464    if (bitField < 10) {
465        /* It's a small number. Don't waste the space for 0x */
466        *(s++)=hexToStr[bitField];
467    }
468    else {
469        int seenNonZero = 0; /* This is used to remove leading zeros */
470
471        if(hexType==HEX_0X) {
472         *(s++)='0';
473         *(s++)='x';
474        } else if(hexType==HEX_0H) {
475         *(s++)='0';
476        }
477
478        /* This creates a 32-bit field */
479#if U_IS_BIG_ENDIAN
480        for (i = 0; i < sizeof(uint32_t); i++)
481#else
482        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
483#endif
484        {
485            uint8_t value = ptrIdx[i];
486            if (value || seenNonZero) {
487                *(s++)=hexToStr[value>>4];
488                *(s++)=hexToStr[value&0xF];
489                seenNonZero = 1;
490            }
491        }
492        if(hexType==HEX_0H) {
493         *(s++)='h';
494        }
495    }
496
497    *(s++)=0;
498    T_FileStream_writeLine(out, bitFieldStr);
499    return column;
500}
501
502static uint32_t
503write8(FileStream *out, uint8_t byte, uint32_t column) {
504    char s[4];
505    int i=0;
506
507    /* convert the byte value to a string */
508    if(byte>=100) {
509        s[i++]=(char)('0'+byte/100);
510        byte%=100;
511    }
512    if(i>0 || byte>=10) {
513        s[i++]=(char)('0'+byte/10);
514        byte%=10;
515    }
516    s[i++]=(char)('0'+byte);
517    s[i]=0;
518
519    /* write the value, possibly with comma and newline */
520    if(column==MAX_COLUMN) {
521        /* first byte */
522        column=1;
523    } else if(column<16) {
524        T_FileStream_writeLine(out, ",");
525        ++column;
526    } else {
527        T_FileStream_writeLine(out, ",\n");
528        column=1;
529    }
530    T_FileStream_writeLine(out, s);
531    return column;
532}
533
534#ifdef OS400
535static uint32_t
536write8str(FileStream *out, uint8_t byte, uint32_t column) {
537    char s[8];
538
539    if (byte > 7)
540        sprintf(s, "\\x%X", byte);
541    else
542        sprintf(s, "\\%X", byte);
543
544    /* write the value, possibly with comma and newline */
545    if(column==MAX_COLUMN) {
546        /* first byte */
547        column=1;
548        T_FileStream_writeLine(out, "\"");
549    } else if(column<24) {
550        ++column;
551    } else {
552        T_FileStream_writeLine(out, "\"\n\"");
553        column=1;
554    }
555    T_FileStream_writeLine(out, s);
556    return column;
557}
558#endif
559
560static void
561getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
562    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
563
564    /* copy path */
565    if(destdir!=NULL && *destdir!=0) {
566        do {
567            *outFilename++=*destdir++;
568        } while(*destdir!=0);
569        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
570            *outFilename++=U_FILE_SEP_CHAR;
571        }
572        inFilename=basename;
573    } else {
574        while(inFilename<basename) {
575            *outFilename++=*inFilename++;
576        }
577    }
578
579    if(suffix==NULL) {
580        /* the filename does not have a suffix */
581        uprv_strcpy(entryName, inFilename);
582        if(optFilename != NULL) {
583          uprv_strcpy(outFilename, optFilename);
584        } else {
585          uprv_strcpy(outFilename, inFilename);
586        }
587        uprv_strcat(outFilename, newSuffix);
588    } else {
589        char *saveOutFilename = outFilename;
590        /* copy basename */
591        while(inFilename<suffix) {
592            if(*inFilename=='-') {
593                /* iSeries cannot have '-' in the .o objects. */
594                *outFilename++=*entryName++='_';
595                inFilename++;
596            }
597            else {
598                *outFilename++=*entryName++=*inFilename++;
599            }
600        }
601
602        /* replace '.' by '_' */
603        *outFilename++=*entryName++='_';
604        ++inFilename;
605
606        /* copy suffix */
607        while(*inFilename!=0) {
608            *outFilename++=*entryName++=*inFilename++;
609        }
610
611        *entryName=0;
612
613        if(optFilename != NULL) {
614            uprv_strcpy(saveOutFilename, optFilename);
615            uprv_strcat(saveOutFilename, newSuffix);
616        } else {
617            /* add ".c" */
618            uprv_strcpy(outFilename, newSuffix);
619        }
620    }
621}
622
623#ifdef CAN_GENERATE_OBJECTS
624static void
625getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
626    int64_t buffer[256];
627    const char *filename;
628    FileStream *in;
629    int32_t length;
630
631#ifdef U_ELF
632    /* Pointer to ELF header. Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
633    const Elf32_Ehdr *pHeader32;
634#elif defined(U_WINDOWS)
635    const IMAGE_FILE_HEADER *pHeader;
636#else
637#   error "Unknown platform for CAN_GENERATE_OBJECTS."
638#endif
639
640    if(optMatchArch != NULL) {
641        filename=optMatchArch;
642    } else {
643        /* set defaults */
644#ifdef U_ELF
645        /* set EM_386 because elf.h does not provide better defaults */
646        *pCPU=EM_386;
647        *pBits=32;
648        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
649#elif defined(U_WINDOWS)
650/* _M_IA64 should be defined in windows.h */
651#   if defined(_M_IA64)
652        *pCPU=IMAGE_FILE_MACHINE_IA64;
653#   elif defined(_M_AMD64)
654        *pCPU=IMAGE_FILE_MACHINE_AMD64;
655#   else
656        *pCPU=IMAGE_FILE_MACHINE_I386;
657#   endif
658        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
659        *pIsBigEndian=FALSE;
660#else
661#   error "Unknown platform for CAN_GENERATE_OBJECTS."
662#endif
663        return;
664    }
665
666    in=T_FileStream_open(filename, "rb");
667    if(in==NULL) {
668        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
669        exit(U_FILE_ACCESS_ERROR);
670    }
671    length=T_FileStream_read(in, buffer, sizeof(buffer));
672
673#ifdef U_ELF
674    if(length<sizeof(Elf32_Ehdr)) {
675        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
676        exit(U_UNSUPPORTED_ERROR);
677    }
678    pHeader32=(const Elf32_Ehdr *)buffer;
679    if(
680        pHeader32->e_ident[0]!=ELFMAG0 ||
681        pHeader32->e_ident[1]!=ELFMAG1 ||
682        pHeader32->e_ident[2]!=ELFMAG2 ||
683        pHeader32->e_ident[3]!=ELFMAG3 ||
684        pHeader32->e_ident[EI_CLASS]<ELFCLASS32 || pHeader32->e_ident[EI_CLASS]>ELFCLASS64
685    ) {
686        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
687        exit(U_UNSUPPORTED_ERROR);
688    }
689
690    *pBits= pHeader32->e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
691#ifdef U_ELF64
692    if(*pBits!=32 && *pBits!=64) {
693        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
694        exit(U_UNSUPPORTED_ERROR);
695    }
696#else
697    if(*pBits!=32) {
698        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
699        exit(U_UNSUPPORTED_ERROR);
700    }
701#endif
702
703    *pIsBigEndian=(UBool)(pHeader32->e_ident[EI_DATA]==ELFDATA2MSB);
704    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
705        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
706        exit(U_UNSUPPORTED_ERROR);
707    }
708    /* TODO: Support byte swapping */
709
710    *pCPU=pHeader32->e_machine;
711#elif defined(U_WINDOWS)
712    if(length<sizeof(IMAGE_FILE_HEADER)) {
713        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
714        exit(U_UNSUPPORTED_ERROR);
715    }
716    pHeader=(const IMAGE_FILE_HEADER *)buffer;
717    *pCPU=pHeader->Machine;
718    /*
719     * The number of bits is implicit with the Machine value.
720     * *pBits is ignored in the calling code, so this need not be precise.
721     */
722    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
723    /* Windows always runs on little-endian CPUs. */
724    *pIsBigEndian=FALSE;
725#else
726#   error "Unknown platform for CAN_GENERATE_OBJECTS."
727#endif
728
729    T_FileStream_close(in);
730}
731
732U_CAPI void U_EXPORT2
733writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
734    /* common variables */
735    char buffer[4096], entry[40]={ 0 };
736    FileStream *in, *out;
737    const char *newSuffix;
738    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
739
740    uint16_t cpu, bits;
741    UBool makeBigEndian;
742
743    /* platform-specific variables and initialization code */
744#ifdef U_ELF
745    /* 32-bit Elf file header */
746    static Elf32_Ehdr header32={
747        {
748            /* e_ident[] */
749            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
750            ELFCLASS32,
751            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
752            EV_CURRENT /* EI_VERSION */
753        },
754        ET_REL,
755        EM_386,
756        EV_CURRENT, /* e_version */
757        0, /* e_entry */
758        0, /* e_phoff */
759        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
760        0, /* e_flags */
761        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
762        0, /* e_phentsize */
763        0, /* e_phnum */
764        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
765        5, /* e_shnum */
766        2 /* e_shstrndx */
767    };
768
769    /* 32-bit Elf section header table */
770    static Elf32_Shdr sectionHeaders32[5]={
771        { /* SHN_UNDEF */
772            0
773        },
774        { /* .symtab */
775            1, /* sh_name */
776            SHT_SYMTAB,
777            0, /* sh_flags */
778            0, /* sh_addr */
779            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
780            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
781            3, /* sh_link=sect hdr index of .strtab */
782            1, /* sh_info=One greater than the symbol table index of the last
783                * local symbol (with STB_LOCAL). */
784            4, /* sh_addralign */
785            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
786        },
787        { /* .shstrtab */
788            9, /* sh_name */
789            SHT_STRTAB,
790            0, /* sh_flags */
791            0, /* sh_addr */
792            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
793            40, /* sh_size */
794            0, /* sh_link */
795            0, /* sh_info */
796            1, /* sh_addralign */
797            0 /* sh_entsize */
798        },
799        { /* .strtab */
800            19, /* sh_name */
801            SHT_STRTAB,
802            0, /* sh_flags */
803            0, /* sh_addr */
804            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
805            (Elf32_Word)sizeof(entry), /* sh_size */
806            0, /* sh_link */
807            0, /* sh_info */
808            1, /* sh_addralign */
809            0 /* sh_entsize */
810        },
811        { /* .rodata */
812            27, /* sh_name */
813            SHT_PROGBITS,
814            SHF_ALLOC, /* sh_flags */
815            0, /* sh_addr */
816            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
817            0, /* sh_size */
818            0, /* sh_link */
819            0, /* sh_info */
820            16, /* sh_addralign */
821            0 /* sh_entsize */
822        }
823    };
824
825    /* symbol table */
826    static Elf32_Sym symbols32[2]={
827        { /* STN_UNDEF */
828            0
829        },
830        { /* data entry point */
831            1, /* st_name */
832            0, /* st_value */
833            0, /* st_size */
834            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
835            0, /* st_other */
836            4 /* st_shndx=index of related section table entry */
837        }
838    };
839
840    /* section header string table, with decimal string offsets */
841    static const char sectionStrings[40]=
842        /*  0 */ "\0"
843        /*  1 */ ".symtab\0"
844        /*  9 */ ".shstrtab\0"
845        /* 19 */ ".strtab\0"
846        /* 27 */ ".rodata\0"
847        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
848        /* 40: padded to multiple of 8 bytes */
849
850    /*
851     * Use entry[] for the string table which will contain only the
852     * entry point name.
853     * entry[0] must be 0 (NUL)
854     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
855     */
856
857    /* 16-align .rodata in the .o file, just in case */
858    static const char padding[16]={ 0 };
859    int32_t paddingSize;
860
861#ifdef U_ELF64
862    /* 64-bit Elf file header */
863    static Elf64_Ehdr header64={
864        {
865            /* e_ident[] */
866            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
867            ELFCLASS64,
868            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
869            EV_CURRENT /* EI_VERSION */
870        },
871        ET_REL,
872        EM_X86_64,
873        EV_CURRENT, /* e_version */
874        0, /* e_entry */
875        0, /* e_phoff */
876        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
877        0, /* e_flags */
878        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
879        0, /* e_phentsize */
880        0, /* e_phnum */
881        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
882        5, /* e_shnum */
883        2 /* e_shstrndx */
884    };
885
886    /* 64-bit Elf section header table */
887    static Elf64_Shdr sectionHeaders64[5]={
888        { /* SHN_UNDEF */
889            0
890        },
891        { /* .symtab */
892            1, /* sh_name */
893            SHT_SYMTAB,
894            0, /* sh_flags */
895            0, /* sh_addr */
896            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
897            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
898            3, /* sh_link=sect hdr index of .strtab */
899            1, /* sh_info=One greater than the symbol table index of the last
900                * local symbol (with STB_LOCAL). */
901            4, /* sh_addralign */
902            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
903        },
904        { /* .shstrtab */
905            9, /* sh_name */
906            SHT_STRTAB,
907            0, /* sh_flags */
908            0, /* sh_addr */
909            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
910            40, /* sh_size */
911            0, /* sh_link */
912            0, /* sh_info */
913            1, /* sh_addralign */
914            0 /* sh_entsize */
915        },
916        { /* .strtab */
917            19, /* sh_name */
918            SHT_STRTAB,
919            0, /* sh_flags */
920            0, /* sh_addr */
921            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
922            (Elf64_Xword)sizeof(entry), /* sh_size */
923            0, /* sh_link */
924            0, /* sh_info */
925            1, /* sh_addralign */
926            0 /* sh_entsize */
927        },
928        { /* .rodata */
929            27, /* sh_name */
930            SHT_PROGBITS,
931            SHF_ALLOC, /* sh_flags */
932            0, /* sh_addr */
933            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
934            0, /* sh_size */
935            0, /* sh_link */
936            0, /* sh_info */
937            16, /* sh_addralign */
938            0 /* sh_entsize */
939        }
940    };
941
942    /*
943     * 64-bit symbol table
944     * careful: different order of items compared with Elf32_sym!
945     */
946    static Elf64_Sym symbols64[2]={
947        { /* STN_UNDEF */
948            0
949        },
950        { /* data entry point */
951            1, /* st_name */
952            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
953            0, /* st_other */
954            4, /* st_shndx=index of related section table entry */
955            0, /* st_value */
956            0 /* st_size */
957        }
958    };
959
960#endif /* U_ELF64 */
961
962    /* entry[] have a leading NUL */
963    entryOffset=1;
964
965    /* in the common code, count entryLength from after the NUL */
966    entryLengthOffset=1;
967
968    newSuffix=".o";
969
970#elif defined(U_WINDOWS)
971    struct {
972        IMAGE_FILE_HEADER fileHeader;
973        IMAGE_SECTION_HEADER sections[2];
974        char linkerOptions[100];
975    } objHeader;
976    IMAGE_SYMBOL symbols[1];
977    struct {
978        DWORD sizeofLongNames;
979        char longNames[100];
980    } symbolNames;
981
982    /*
983     * entry sometimes have a leading '_'
984     * overwritten if entryOffset==0 depending on the target platform
985     * see check for cpu below
986     */
987    entry[0]='_';
988
989    newSuffix=".obj";
990#else
991#   error "Unknown platform for CAN_GENERATE_OBJECTS."
992#endif
993
994    /* deal with options, files and the entry point name */
995    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
996    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
997#ifdef U_WINDOWS
998    if(cpu==IMAGE_FILE_MACHINE_I386) {
999        entryOffset=1;
1000    }
1001#endif
1002
1003    in=T_FileStream_open(filename, "rb");
1004    if(in==NULL) {
1005        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1006        exit(U_FILE_ACCESS_ERROR);
1007    }
1008    size=T_FileStream_size(in);
1009
1010    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1011    if (outFilePath != NULL) {
1012        uprv_strcpy(outFilePath, buffer);
1013    }
1014
1015    if(optEntryPoint != NULL) {
1016        uprv_strcpy(entry+entryOffset, optEntryPoint);
1017        uprv_strcat(entry+entryOffset, "_dat");
1018    }
1019    /* turn dashes in the entry name into underscores */
1020    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1021    for(i=0; i<entryLength; ++i) {
1022        if(entry[entryLengthOffset+i]=='-') {
1023            entry[entryLengthOffset+i]='_';
1024        }
1025    }
1026
1027    /* open the output file */
1028    out=T_FileStream_open(buffer, "wb");
1029    if(out==NULL) {
1030        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1031        exit(U_FILE_ACCESS_ERROR);
1032    }
1033
1034#ifdef U_ELF
1035    if(bits==32) {
1036        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1037        header32.e_machine=cpu;
1038
1039        /* 16-align .rodata in the .o file, just in case */
1040        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1041        if(paddingSize!=0) {
1042                paddingSize=0x10-paddingSize;
1043                sectionHeaders32[4].sh_offset+=paddingSize;
1044        }
1045
1046        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1047
1048        symbols32[1].st_size=(Elf32_Word)size;
1049
1050        /* write .o headers */
1051        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1052        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1053        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1054    } else /* bits==64 */ {
1055#ifdef U_ELF64
1056        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1057        header64.e_machine=cpu;
1058
1059        /* 16-align .rodata in the .o file, just in case */
1060        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1061        if(paddingSize!=0) {
1062                paddingSize=0x10-paddingSize;
1063                sectionHeaders64[4].sh_offset+=paddingSize;
1064        }
1065
1066        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1067
1068        symbols64[1].st_size=(Elf64_Xword)size;
1069
1070        /* write .o headers */
1071        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1072        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1073        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1074#endif
1075    }
1076
1077    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1078    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1079    if(paddingSize!=0) {
1080        T_FileStream_write(out, padding, paddingSize);
1081    }
1082#elif defined(U_WINDOWS)
1083    /* populate the .obj headers */
1084    uprv_memset(&objHeader, 0, sizeof(objHeader));
1085    uprv_memset(&symbols, 0, sizeof(symbols));
1086    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1087
1088    /* write the linker export directive */
1089    uprv_strcpy(objHeader.linkerOptions, "-export:");
1090    length=8;
1091    uprv_strcpy(objHeader.linkerOptions+length, entry);
1092    length+=entryLength;
1093    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1094    length+=6;
1095
1096    /* set the file header */
1097    objHeader.fileHeader.Machine=cpu;
1098    objHeader.fileHeader.NumberOfSections=2;
1099    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1100    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1101    objHeader.fileHeader.NumberOfSymbols=1;
1102
1103    /* set the section for the linker options */
1104    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1105    objHeader.sections[0].SizeOfRawData=length;
1106    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1107    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1108
1109    /* set the data section */
1110    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1111    objHeader.sections[1].SizeOfRawData=size;
1112    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1113    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1114
1115    /* set the symbol table */
1116    if(entryLength<=8) {
1117        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1118        symbolNames.sizeofLongNames=4;
1119    } else {
1120        symbols[0].N.Name.Short=0;
1121        symbols[0].N.Name.Long=4;
1122        symbolNames.sizeofLongNames=4+entryLength+1;
1123        uprv_strcpy(symbolNames.longNames, entry);
1124    }
1125    symbols[0].SectionNumber=2;
1126    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1127
1128    /* write the file header and the linker options section */
1129    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1130#else
1131#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1132#endif
1133
1134    /* copy the data file into section 2 */
1135    for(;;) {
1136        length=T_FileStream_read(in, buffer, sizeof(buffer));
1137        if(length==0) {
1138            break;
1139        }
1140        T_FileStream_write(out, buffer, (int32_t)length);
1141    }
1142
1143#ifdef U_WINDOWS
1144    /* write the symbol table */
1145    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1146    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1147#endif
1148
1149    if(T_FileStream_error(in)) {
1150        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1151        exit(U_FILE_ACCESS_ERROR);
1152    }
1153
1154    if(T_FileStream_error(out)) {
1155        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1156        exit(U_FILE_ACCESS_ERROR);
1157    }
1158
1159    T_FileStream_close(out);
1160    T_FileStream_close(in);
1161}
1162#endif
1163