1/******************************************************************************
2 *   Copyright (C) 2009-2015, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#if U_PLATFORM_HAS_WIN32_API
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54/* prototypes --------------------------------------------------------------- */
55static void
56getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58static uint32_t
59write8(FileStream *out, uint8_t byte, uint32_t column);
60
61static uint32_t
62write32(FileStream *out, uint32_t byte, uint32_t column);
63
64#if U_PLATFORM == U_PF_OS400
65static uint32_t
66write8str(FileStream *out, uint8_t byte, uint32_t column);
67#endif
68/* -------------------------------------------------------------------------- */
69
70/*
71Creating Template Files for New Platforms
72
73Let the cc compiler help you get started.
74Compile this program
75    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76with the -S option to produce assembly output.
77
78For example, this will generate array.s:
79gcc -S array.c
80
81This will produce a .s file that may look like this:
82
83    .file   "array.c"
84    .version        "01.01"
85gcc2_compiled.:
86    .globl x
87    .section        .rodata
88    .align 4
89    .type    x,@object
90    .size    x,20
91x:
92    .long   1
93    .long   2
94    .long   -559038737
95    .long   -1
96    .long   16
97    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99which gives a starting point that will compile, and can be transformed
100to become the template, generally with some consulting of as docs and
101some experimentation.
102
103If you want ICU to automatically use this assembly, you should
104specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105where the name is the compiler or platform that you used in this
106assemblyHeader data structure.
107*/
108static const struct AssemblyType {
109    const char *name;
110    const char *header;
111    const char *beginLine;
112    const char *footer;
113    int8_t      hexType; /* HEX_0X or HEX_0h */
114} assemblyHeader[] = {
115    /* For gcc assemblers, the meaning of .align changes depending on the */
116    /* hardware, so we use .balign 16 which always means 16 bytes. */
117    /* https://sourceware.org/binutils/docs/as/Pseudo-Ops.html */
118    {"gcc",
119        ".globl %s\n"
120        "\t.section .note.GNU-stack,\"\",%%progbits\n"
121        "\t.section .rodata\n"
122        "\t.balign 16\n"
123        "#ifdef U_HIDE_DATA_SYMBOL\n"
124        "\t.hidden %s\n"
125        "#endif\n"
126        "\t.type %s,%%object\n"
127        "%s:\n\n",
128
129        ".long ","",HEX_0X
130    },
131    {"gcc-darwin",
132        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
133        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
134        ".globl _%s\n"
135        "#ifdef U_HIDE_DATA_SYMBOL\n"
136        "\t.private_extern _%s\n"
137        "#endif\n"
138        "\t.data\n"
139        "\t.const\n"
140        "\t.balign 16\n"
141        "_%s:\n\n",
142
143        ".long ","",HEX_0X
144    },
145    {"gcc-cygwin",
146        ".globl _%s\n"
147        "\t.section .rodata\n"
148        "\t.balign 16\n"
149        "_%s:\n\n",
150
151        ".long ","",HEX_0X
152    },
153    {"gcc-mingw64",
154        ".globl %s\n"
155        "\t.section .rodata\n"
156        "\t.balign 16\n"
157        "%s:\n\n",
158
159        ".long ","",HEX_0X
160    },
161/* 16 bytes alignment. */
162/* http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf */
163    {"sun",
164        "\t.section \".rodata\"\n"
165        "\t.align   16\n"
166        ".globl     %s\n"
167        "%s:\n",
168
169        ".word ","",HEX_0X
170    },
171/* 16 bytes alignment for sun-x86. */
172/* http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html */
173    {"sun-x86",
174        "Drodata.rodata:\n"
175        "\t.type   Drodata.rodata,@object\n"
176        "\t.size   Drodata.rodata,0\n"
177        "\t.globl  %s\n"
178        "\t.align  16\n"
179        "%s:\n",
180
181        ".4byte ","",HEX_0X
182    },
183/* 1<<4 bit alignment for aix. */
184/* http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm */
185    {"xlc",
186        ".globl %s{RO}\n"
187        "\t.toc\n"
188        "%s:\n"
189        "\t.csect %s{RO}, 4\n",
190
191        ".long ","",HEX_0X
192    },
193    {"aCC-ia64",
194        "\t.file   \"%s.s\"\n"
195        "\t.type   %s,@object\n"
196        "\t.global %s\n"
197        "\t.secalias .abe$0.rodata, \".rodata\"\n"
198        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
199        "\t.align  16\n"
200        "%s::\t",
201
202        "data4 ","",HEX_0X
203    },
204    {"aCC-parisc",
205        "\t.SPACE  $TEXT$\n"
206        "\t.SUBSPA $LIT$\n"
207        "%s\n"
208        "\t.EXPORT %s\n"
209        "\t.ALIGN  16\n",
210
211        ".WORD ","",HEX_0X
212    },
213/* align 16 bytes */
214/*  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx */
215    { "masm",
216      "\tTITLE %s\n"
217      "; generated by genccode\n"
218      ".386\n"
219      ".model flat\n"
220      "\tPUBLIC _%s\n"
221      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
222      "\tALIGN 16\n"
223      "_%s\tLABEL DWORD\n",
224      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
225    }
226};
227
228static int32_t assemblyHeaderIndex = -1;
229static int32_t hexType = HEX_0X;
230
231U_CAPI UBool U_EXPORT2
232checkAssemblyHeaderName(const char* optAssembly) {
233    int32_t idx;
234    assemblyHeaderIndex = -1;
235    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
236        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
237            assemblyHeaderIndex = idx;
238            hexType = assemblyHeader[idx].hexType; /* set the hex type */
239            return TRUE;
240        }
241    }
242
243    return FALSE;
244}
245
246
247U_CAPI void U_EXPORT2
248printAssemblyHeadersToStdErr(void) {
249    int32_t idx;
250    fprintf(stderr, "%s", assemblyHeader[0].name);
251    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
252        fprintf(stderr, ", %s", assemblyHeader[idx].name);
253    }
254    fprintf(stderr,
255        ")\n");
256}
257
258U_CAPI void U_EXPORT2
259writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
260    uint32_t column = MAX_COLUMN;
261    char entry[64];
262    uint32_t buffer[1024];
263    char *bufferStr = (char *)buffer;
264    FileStream *in, *out;
265    size_t i, length;
266
267    in=T_FileStream_open(filename, "rb");
268    if(in==NULL) {
269        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
270        exit(U_FILE_ACCESS_ERROR);
271    }
272
273    getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
274    out=T_FileStream_open(bufferStr, "w");
275    if(out==NULL) {
276        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
277        exit(U_FILE_ACCESS_ERROR);
278    }
279
280    if (outFilePath != NULL) {
281        uprv_strcpy(outFilePath, bufferStr);
282    }
283
284#ifdef WINDOWS_WITH_GNUC
285    /* Need to fix the file seperator character when using MinGW. */
286    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
287#endif
288
289    if(optEntryPoint != NULL) {
290        uprv_strcpy(entry, optEntryPoint);
291        uprv_strcat(entry, "_dat");
292    }
293
294    /* turn dashes or dots in the entry name into underscores */
295    length=uprv_strlen(entry);
296    for(i=0; i<length; ++i) {
297        if(entry[i]=='-' || entry[i]=='.') {
298            entry[i]='_';
299        }
300    }
301
302    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
303        entry, entry, entry, entry,
304        entry, entry, entry, entry);
305    T_FileStream_writeLine(out, bufferStr);
306    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
307
308    for(;;) {
309        length=T_FileStream_read(in, buffer, sizeof(buffer));
310        if(length==0) {
311            break;
312        }
313        if (length != sizeof(buffer)) {
314            /* pad with extra 0's when at the end of the file */
315            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
316                buffer[length+i] = 0;
317            }
318        }
319        for(i=0; i<(length/sizeof(buffer[0])); i++) {
320            column = write32(out, buffer[i], column);
321        }
322    }
323
324    T_FileStream_writeLine(out, "\n");
325
326    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
327        entry, entry, entry, entry,
328        entry, entry, entry, entry);
329    T_FileStream_writeLine(out, bufferStr);
330
331    if(T_FileStream_error(in)) {
332        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
333        exit(U_FILE_ACCESS_ERROR);
334    }
335
336    if(T_FileStream_error(out)) {
337        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
338        exit(U_FILE_ACCESS_ERROR);
339    }
340
341    T_FileStream_close(out);
342    T_FileStream_close(in);
343}
344
345U_CAPI void U_EXPORT2
346writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
347    uint32_t column = MAX_COLUMN;
348    char buffer[4096], entry[64];
349    FileStream *in, *out;
350    size_t i, length;
351
352    in=T_FileStream_open(filename, "rb");
353    if(in==NULL) {
354        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
355        exit(U_FILE_ACCESS_ERROR);
356    }
357
358    if(optName != NULL) { /* prepend  'icudt28_' */
359      strcpy(entry, optName);
360      strcat(entry, "_");
361    } else {
362      entry[0] = 0;
363    }
364
365    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
366    if (outFilePath != NULL) {
367        uprv_strcpy(outFilePath, buffer);
368    }
369    out=T_FileStream_open(buffer, "w");
370    if(out==NULL) {
371        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
372        exit(U_FILE_ACCESS_ERROR);
373    }
374
375    /* turn dashes or dots in the entry name into underscores */
376    length=uprv_strlen(entry);
377    for(i=0; i<length; ++i) {
378        if(entry[i]=='-' || entry[i]=='.') {
379            entry[i]='_';
380        }
381    }
382
383#if U_PLATFORM == U_PF_OS400
384    /*
385    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
386
387    This is here because this platform can't currently put
388    const data into the read-only pages of an object or
389    shared library (service program). Only strings are allowed in read-only
390    pages, so we use char * strings to store the data.
391
392    In order to prevent the beginning of the data from ever matching the
393    magic numbers we must still use the initial double.
394    [grhoten 4/24/2003]
395    */
396    sprintf(buffer,
397        "#ifndef IN_GENERATED_CCODE\n"
398        "#define IN_GENERATED_CCODE\n"
399        "#define U_DISABLE_RENAMING 1\n"
400        "#include \"unicode/umachine.h\"\n"
401        "#endif\n"
402        "U_CDECL_BEGIN\n"
403        "const struct {\n"
404        "    double bogus;\n"
405        "    const char *bytes; \n"
406        "} %s={ 0.0, \n",
407        entry);
408    T_FileStream_writeLine(out, buffer);
409
410    for(;;) {
411        length=T_FileStream_read(in, buffer, sizeof(buffer));
412        if(length==0) {
413            break;
414        }
415        for(i=0; i<length; ++i) {
416            column = write8str(out, (uint8_t)buffer[i], column);
417        }
418    }
419
420    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
421#else
422    /* Function renaming shouldn't be done in data */
423    sprintf(buffer,
424        "#ifndef IN_GENERATED_CCODE\n"
425        "#define IN_GENERATED_CCODE\n"
426        "#define U_DISABLE_RENAMING 1\n"
427        "#include \"unicode/umachine.h\"\n"
428        "#endif\n"
429        "U_CDECL_BEGIN\n"
430        "const struct {\n"
431        "    double bogus;\n"
432        "    uint8_t bytes[%ld]; \n"
433        "} %s={ 0.0, {\n",
434        (long)T_FileStream_size(in), entry);
435    T_FileStream_writeLine(out, buffer);
436
437    for(;;) {
438        length=T_FileStream_read(in, buffer, sizeof(buffer));
439        if(length==0) {
440            break;
441        }
442        for(i=0; i<length; ++i) {
443            column = write8(out, (uint8_t)buffer[i], column);
444        }
445    }
446
447    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
448#endif
449
450    if(T_FileStream_error(in)) {
451        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
452        exit(U_FILE_ACCESS_ERROR);
453    }
454
455    if(T_FileStream_error(out)) {
456        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
457        exit(U_FILE_ACCESS_ERROR);
458    }
459
460    T_FileStream_close(out);
461    T_FileStream_close(in);
462}
463
464static uint32_t
465write32(FileStream *out, uint32_t bitField, uint32_t column) {
466    int32_t i;
467    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
468    char *s = bitFieldStr;
469    uint8_t *ptrIdx = (uint8_t *)&bitField;
470    static const char hexToStr[16] = {
471        '0','1','2','3',
472        '4','5','6','7',
473        '8','9','A','B',
474        'C','D','E','F'
475    };
476
477    /* write the value, possibly with comma and newline */
478    if(column==MAX_COLUMN) {
479        /* first byte */
480        column=1;
481    } else if(column<32) {
482        *(s++)=',';
483        ++column;
484    } else {
485        *(s++)='\n';
486        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
487        s+=uprv_strlen(s);
488        column=1;
489    }
490
491    if (bitField < 10) {
492        /* It's a small number. Don't waste the space for 0x */
493        *(s++)=hexToStr[bitField];
494    }
495    else {
496        int seenNonZero = 0; /* This is used to remove leading zeros */
497
498        if(hexType==HEX_0X) {
499         *(s++)='0';
500         *(s++)='x';
501        } else if(hexType==HEX_0H) {
502         *(s++)='0';
503        }
504
505        /* This creates a 32-bit field */
506#if U_IS_BIG_ENDIAN
507        for (i = 0; i < sizeof(uint32_t); i++)
508#else
509        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
510#endif
511        {
512            uint8_t value = ptrIdx[i];
513            if (value || seenNonZero) {
514                *(s++)=hexToStr[value>>4];
515                *(s++)=hexToStr[value&0xF];
516                seenNonZero = 1;
517            }
518        }
519        if(hexType==HEX_0H) {
520         *(s++)='h';
521        }
522    }
523
524    *(s++)=0;
525    T_FileStream_writeLine(out, bitFieldStr);
526    return column;
527}
528
529static uint32_t
530write8(FileStream *out, uint8_t byte, uint32_t column) {
531    char s[4];
532    int i=0;
533
534    /* convert the byte value to a string */
535    if(byte>=100) {
536        s[i++]=(char)('0'+byte/100);
537        byte%=100;
538    }
539    if(i>0 || byte>=10) {
540        s[i++]=(char)('0'+byte/10);
541        byte%=10;
542    }
543    s[i++]=(char)('0'+byte);
544    s[i]=0;
545
546    /* write the value, possibly with comma and newline */
547    if(column==MAX_COLUMN) {
548        /* first byte */
549        column=1;
550    } else if(column<16) {
551        T_FileStream_writeLine(out, ",");
552        ++column;
553    } else {
554        T_FileStream_writeLine(out, ",\n");
555        column=1;
556    }
557    T_FileStream_writeLine(out, s);
558    return column;
559}
560
561#if U_PLATFORM == U_PF_OS400
562static uint32_t
563write8str(FileStream *out, uint8_t byte, uint32_t column) {
564    char s[8];
565
566    if (byte > 7)
567        sprintf(s, "\\x%X", byte);
568    else
569        sprintf(s, "\\%X", byte);
570
571    /* write the value, possibly with comma and newline */
572    if(column==MAX_COLUMN) {
573        /* first byte */
574        column=1;
575        T_FileStream_writeLine(out, "\"");
576    } else if(column<24) {
577        ++column;
578    } else {
579        T_FileStream_writeLine(out, "\"\n\"");
580        column=1;
581    }
582    T_FileStream_writeLine(out, s);
583    return column;
584}
585#endif
586
587static void
588getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
589    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
590
591    /* copy path */
592    if(destdir!=NULL && *destdir!=0) {
593        do {
594            *outFilename++=*destdir++;
595        } while(*destdir!=0);
596        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
597            *outFilename++=U_FILE_SEP_CHAR;
598        }
599        inFilename=basename;
600    } else {
601        while(inFilename<basename) {
602            *outFilename++=*inFilename++;
603        }
604    }
605
606    if(suffix==NULL) {
607        /* the filename does not have a suffix */
608        uprv_strcpy(entryName, inFilename);
609        if(optFilename != NULL) {
610          uprv_strcpy(outFilename, optFilename);
611        } else {
612          uprv_strcpy(outFilename, inFilename);
613        }
614        uprv_strcat(outFilename, newSuffix);
615    } else {
616        char *saveOutFilename = outFilename;
617        /* copy basename */
618        while(inFilename<suffix) {
619            if(*inFilename=='-') {
620                /* iSeries cannot have '-' in the .o objects. */
621                *outFilename++=*entryName++='_';
622                inFilename++;
623            }
624            else {
625                *outFilename++=*entryName++=*inFilename++;
626            }
627        }
628
629        /* replace '.' by '_' */
630        *outFilename++=*entryName++='_';
631        ++inFilename;
632
633        /* copy suffix */
634        while(*inFilename!=0) {
635            *outFilename++=*entryName++=*inFilename++;
636        }
637
638        *entryName=0;
639
640        if(optFilename != NULL) {
641            uprv_strcpy(saveOutFilename, optFilename);
642            uprv_strcat(saveOutFilename, newSuffix);
643        } else {
644            /* add ".c" */
645            uprv_strcpy(outFilename, newSuffix);
646        }
647    }
648}
649
650#ifdef CAN_GENERATE_OBJECTS
651static void
652getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
653    union {
654        char        bytes[2048];
655#ifdef U_ELF
656        Elf32_Ehdr  header32;
657        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
658#elif U_PLATFORM_HAS_WIN32_API
659        IMAGE_FILE_HEADER header;
660#endif
661    } buffer;
662
663    const char *filename;
664    FileStream *in;
665    int32_t length;
666
667#ifdef U_ELF
668
669#elif U_PLATFORM_HAS_WIN32_API
670    const IMAGE_FILE_HEADER *pHeader;
671#else
672#   error "Unknown platform for CAN_GENERATE_OBJECTS."
673#endif
674
675    if(optMatchArch != NULL) {
676        filename=optMatchArch;
677    } else {
678        /* set defaults */
679#ifdef U_ELF
680        /* set EM_386 because elf.h does not provide better defaults */
681        *pCPU=EM_386;
682        *pBits=32;
683        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
684#elif U_PLATFORM_HAS_WIN32_API
685/* _M_IA64 should be defined in windows.h */
686#   if defined(_M_IA64)
687        *pCPU=IMAGE_FILE_MACHINE_IA64;
688#   elif defined(_M_AMD64)
689        *pCPU=IMAGE_FILE_MACHINE_AMD64;
690#   else
691        *pCPU=IMAGE_FILE_MACHINE_I386;
692#   endif
693        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
694        *pIsBigEndian=FALSE;
695#else
696#   error "Unknown platform for CAN_GENERATE_OBJECTS."
697#endif
698        return;
699    }
700
701    in=T_FileStream_open(filename, "rb");
702    if(in==NULL) {
703        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
704        exit(U_FILE_ACCESS_ERROR);
705    }
706    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
707
708#ifdef U_ELF
709    if(length<sizeof(Elf32_Ehdr)) {
710        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
711        exit(U_UNSUPPORTED_ERROR);
712    }
713    if(
714        buffer.header32.e_ident[0]!=ELFMAG0 ||
715        buffer.header32.e_ident[1]!=ELFMAG1 ||
716        buffer.header32.e_ident[2]!=ELFMAG2 ||
717        buffer.header32.e_ident[3]!=ELFMAG3 ||
718        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
719    ) {
720        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
721        exit(U_UNSUPPORTED_ERROR);
722    }
723
724    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
725#ifdef U_ELF64
726    if(*pBits!=32 && *pBits!=64) {
727        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
728        exit(U_UNSUPPORTED_ERROR);
729    }
730#else
731    if(*pBits!=32) {
732        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
733        exit(U_UNSUPPORTED_ERROR);
734    }
735#endif
736
737    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
738    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
739        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
740        exit(U_UNSUPPORTED_ERROR);
741    }
742    /* TODO: Support byte swapping */
743
744    *pCPU=buffer.header32.e_machine;
745#elif U_PLATFORM_HAS_WIN32_API
746    if(length<sizeof(IMAGE_FILE_HEADER)) {
747        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
748        exit(U_UNSUPPORTED_ERROR);
749    }
750    /* TODO: Use buffer.header.  Keep aliasing legal.  */
751    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
752    *pCPU=pHeader->Machine;
753    /*
754     * The number of bits is implicit with the Machine value.
755     * *pBits is ignored in the calling code, so this need not be precise.
756     */
757    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
758    /* Windows always runs on little-endian CPUs. */
759    *pIsBigEndian=FALSE;
760#else
761#   error "Unknown platform for CAN_GENERATE_OBJECTS."
762#endif
763
764    T_FileStream_close(in);
765}
766
767U_CAPI void U_EXPORT2
768writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
769    /* common variables */
770    char buffer[4096], entry[96]={ 0 };
771    FileStream *in, *out;
772    const char *newSuffix;
773    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
774
775    uint16_t cpu, bits;
776    UBool makeBigEndian;
777
778    /* platform-specific variables and initialization code */
779#ifdef U_ELF
780    /* 32-bit Elf file header */
781    static Elf32_Ehdr header32={
782        {
783            /* e_ident[] */
784            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
785            ELFCLASS32,
786            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
787            EV_CURRENT /* EI_VERSION */
788        },
789        ET_REL,
790        EM_386,
791        EV_CURRENT, /* e_version */
792        0, /* e_entry */
793        0, /* e_phoff */
794        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
795        0, /* e_flags */
796        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
797        0, /* e_phentsize */
798        0, /* e_phnum */
799        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
800        5, /* e_shnum */
801        2 /* e_shstrndx */
802    };
803
804    /* 32-bit Elf section header table */
805    static Elf32_Shdr sectionHeaders32[5]={
806        { /* SHN_UNDEF */
807            0
808        },
809        { /* .symtab */
810            1, /* sh_name */
811            SHT_SYMTAB,
812            0, /* sh_flags */
813            0, /* sh_addr */
814            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
815            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
816            3, /* sh_link=sect hdr index of .strtab */
817            1, /* sh_info=One greater than the symbol table index of the last
818                * local symbol (with STB_LOCAL). */
819            4, /* sh_addralign */
820            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
821        },
822        { /* .shstrtab */
823            9, /* sh_name */
824            SHT_STRTAB,
825            0, /* sh_flags */
826            0, /* sh_addr */
827            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
828            40, /* sh_size */
829            0, /* sh_link */
830            0, /* sh_info */
831            1, /* sh_addralign */
832            0 /* sh_entsize */
833        },
834        { /* .strtab */
835            19, /* sh_name */
836            SHT_STRTAB,
837            0, /* sh_flags */
838            0, /* sh_addr */
839            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
840            (Elf32_Word)sizeof(entry), /* sh_size */
841            0, /* sh_link */
842            0, /* sh_info */
843            1, /* sh_addralign */
844            0 /* sh_entsize */
845        },
846        { /* .rodata */
847            27, /* sh_name */
848            SHT_PROGBITS,
849            SHF_ALLOC, /* sh_flags */
850            0, /* sh_addr */
851            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
852            0, /* sh_size */
853            0, /* sh_link */
854            0, /* sh_info */
855            16, /* sh_addralign */
856            0 /* sh_entsize */
857        }
858    };
859
860    /* symbol table */
861    static Elf32_Sym symbols32[2]={
862        { /* STN_UNDEF */
863            0
864        },
865        { /* data entry point */
866            1, /* st_name */
867            0, /* st_value */
868            0, /* st_size */
869            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
870            0, /* st_other */
871            4 /* st_shndx=index of related section table entry */
872        }
873    };
874
875    /* section header string table, with decimal string offsets */
876    static const char sectionStrings[40]=
877        /*  0 */ "\0"
878        /*  1 */ ".symtab\0"
879        /*  9 */ ".shstrtab\0"
880        /* 19 */ ".strtab\0"
881        /* 27 */ ".rodata\0"
882        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
883        /* 40: padded to multiple of 8 bytes */
884
885    /*
886     * Use entry[] for the string table which will contain only the
887     * entry point name.
888     * entry[0] must be 0 (NUL)
889     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
890     */
891
892    /* 16-align .rodata in the .o file, just in case */
893    static const char padding[16]={ 0 };
894    int32_t paddingSize;
895
896#ifdef U_ELF64
897    /* 64-bit Elf file header */
898    static Elf64_Ehdr header64={
899        {
900            /* e_ident[] */
901            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
902            ELFCLASS64,
903            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
904            EV_CURRENT /* EI_VERSION */
905        },
906        ET_REL,
907        EM_X86_64,
908        EV_CURRENT, /* e_version */
909        0, /* e_entry */
910        0, /* e_phoff */
911        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
912        0, /* e_flags */
913        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
914        0, /* e_phentsize */
915        0, /* e_phnum */
916        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
917        5, /* e_shnum */
918        2 /* e_shstrndx */
919    };
920
921    /* 64-bit Elf section header table */
922    static Elf64_Shdr sectionHeaders64[5]={
923        { /* SHN_UNDEF */
924            0
925        },
926        { /* .symtab */
927            1, /* sh_name */
928            SHT_SYMTAB,
929            0, /* sh_flags */
930            0, /* sh_addr */
931            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
932            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
933            3, /* sh_link=sect hdr index of .strtab */
934            1, /* sh_info=One greater than the symbol table index of the last
935                * local symbol (with STB_LOCAL). */
936            4, /* sh_addralign */
937            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
938        },
939        { /* .shstrtab */
940            9, /* sh_name */
941            SHT_STRTAB,
942            0, /* sh_flags */
943            0, /* sh_addr */
944            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
945            40, /* sh_size */
946            0, /* sh_link */
947            0, /* sh_info */
948            1, /* sh_addralign */
949            0 /* sh_entsize */
950        },
951        { /* .strtab */
952            19, /* sh_name */
953            SHT_STRTAB,
954            0, /* sh_flags */
955            0, /* sh_addr */
956            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
957            (Elf64_Xword)sizeof(entry), /* sh_size */
958            0, /* sh_link */
959            0, /* sh_info */
960            1, /* sh_addralign */
961            0 /* sh_entsize */
962        },
963        { /* .rodata */
964            27, /* sh_name */
965            SHT_PROGBITS,
966            SHF_ALLOC, /* sh_flags */
967            0, /* sh_addr */
968            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
969            0, /* sh_size */
970            0, /* sh_link */
971            0, /* sh_info */
972            16, /* sh_addralign */
973            0 /* sh_entsize */
974        }
975    };
976
977    /*
978     * 64-bit symbol table
979     * careful: different order of items compared with Elf32_sym!
980     */
981    static Elf64_Sym symbols64[2]={
982        { /* STN_UNDEF */
983            0
984        },
985        { /* data entry point */
986            1, /* st_name */
987            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
988            0, /* st_other */
989            4, /* st_shndx=index of related section table entry */
990            0, /* st_value */
991            0 /* st_size */
992        }
993    };
994
995#endif /* U_ELF64 */
996
997    /* entry[] have a leading NUL */
998    entryOffset=1;
999
1000    /* in the common code, count entryLength from after the NUL */
1001    entryLengthOffset=1;
1002
1003    newSuffix=".o";
1004
1005#elif U_PLATFORM_HAS_WIN32_API
1006    struct {
1007        IMAGE_FILE_HEADER fileHeader;
1008        IMAGE_SECTION_HEADER sections[2];
1009        char linkerOptions[100];
1010    } objHeader;
1011    IMAGE_SYMBOL symbols[1];
1012    struct {
1013        DWORD sizeofLongNames;
1014        char longNames[100];
1015    } symbolNames;
1016
1017    /*
1018     * entry sometimes have a leading '_'
1019     * overwritten if entryOffset==0 depending on the target platform
1020     * see check for cpu below
1021     */
1022    entry[0]='_';
1023
1024    newSuffix=".obj";
1025#else
1026#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1027#endif
1028
1029    /* deal with options, files and the entry point name */
1030    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1031    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1032#if U_PLATFORM_HAS_WIN32_API
1033    if(cpu==IMAGE_FILE_MACHINE_I386) {
1034        entryOffset=1;
1035    }
1036#endif
1037
1038    in=T_FileStream_open(filename, "rb");
1039    if(in==NULL) {
1040        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1041        exit(U_FILE_ACCESS_ERROR);
1042    }
1043    size=T_FileStream_size(in);
1044
1045    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1046    if (outFilePath != NULL) {
1047        uprv_strcpy(outFilePath, buffer);
1048    }
1049
1050    if(optEntryPoint != NULL) {
1051        uprv_strcpy(entry+entryOffset, optEntryPoint);
1052        uprv_strcat(entry+entryOffset, "_dat");
1053    }
1054    /* turn dashes in the entry name into underscores */
1055    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1056    for(i=0; i<entryLength; ++i) {
1057        if(entry[entryLengthOffset+i]=='-') {
1058            entry[entryLengthOffset+i]='_';
1059        }
1060    }
1061
1062    /* open the output file */
1063    out=T_FileStream_open(buffer, "wb");
1064    if(out==NULL) {
1065        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1066        exit(U_FILE_ACCESS_ERROR);
1067    }
1068
1069#ifdef U_ELF
1070    if(bits==32) {
1071        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1072        header32.e_machine=cpu;
1073
1074        /* 16-align .rodata in the .o file, just in case */
1075        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1076        if(paddingSize!=0) {
1077                paddingSize=0x10-paddingSize;
1078                sectionHeaders32[4].sh_offset+=paddingSize;
1079        }
1080
1081        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1082
1083        symbols32[1].st_size=(Elf32_Word)size;
1084
1085        /* write .o headers */
1086        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1087        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1088        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1089    } else /* bits==64 */ {
1090#ifdef U_ELF64
1091        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1092        header64.e_machine=cpu;
1093
1094        /* 16-align .rodata in the .o file, just in case */
1095        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1096        if(paddingSize!=0) {
1097                paddingSize=0x10-paddingSize;
1098                sectionHeaders64[4].sh_offset+=paddingSize;
1099        }
1100
1101        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1102
1103        symbols64[1].st_size=(Elf64_Xword)size;
1104
1105        /* write .o headers */
1106        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1107        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1108        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1109#endif
1110    }
1111
1112    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1113    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1114    if(paddingSize!=0) {
1115        T_FileStream_write(out, padding, paddingSize);
1116    }
1117#elif U_PLATFORM_HAS_WIN32_API
1118    /* populate the .obj headers */
1119    uprv_memset(&objHeader, 0, sizeof(objHeader));
1120    uprv_memset(&symbols, 0, sizeof(symbols));
1121    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1122
1123    /* write the linker export directive */
1124    uprv_strcpy(objHeader.linkerOptions, "-export:");
1125    length=8;
1126    uprv_strcpy(objHeader.linkerOptions+length, entry);
1127    length+=entryLength;
1128    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1129    length+=6;
1130
1131    /* set the file header */
1132    objHeader.fileHeader.Machine=cpu;
1133    objHeader.fileHeader.NumberOfSections=2;
1134    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1135    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1136    objHeader.fileHeader.NumberOfSymbols=1;
1137
1138    /* set the section for the linker options */
1139    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1140    objHeader.sections[0].SizeOfRawData=length;
1141    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1142    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1143
1144    /* set the data section */
1145    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1146    objHeader.sections[1].SizeOfRawData=size;
1147    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1148    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1149
1150    /* set the symbol table */
1151    if(entryLength<=8) {
1152        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1153        symbolNames.sizeofLongNames=4;
1154    } else {
1155        symbols[0].N.Name.Short=0;
1156        symbols[0].N.Name.Long=4;
1157        symbolNames.sizeofLongNames=4+entryLength+1;
1158        uprv_strcpy(symbolNames.longNames, entry);
1159    }
1160    symbols[0].SectionNumber=2;
1161    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1162
1163    /* write the file header and the linker options section */
1164    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1165#else
1166#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1167#endif
1168
1169    /* copy the data file into section 2 */
1170    for(;;) {
1171        length=T_FileStream_read(in, buffer, sizeof(buffer));
1172        if(length==0) {
1173            break;
1174        }
1175        T_FileStream_write(out, buffer, (int32_t)length);
1176    }
1177
1178#if U_PLATFORM_HAS_WIN32_API
1179    /* write the symbol table */
1180    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1181    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1182#endif
1183
1184    if(T_FileStream_error(in)) {
1185        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1186        exit(U_FILE_ACCESS_ERROR);
1187    }
1188
1189    if(T_FileStream_error(out)) {
1190        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1191        exit(U_FILE_ACCESS_ERROR);
1192    }
1193
1194    T_FileStream_close(out);
1195    T_FileStream_close(in);
1196}
1197#endif
1198