1/******************************************************************************
2 *   Copyright (C) 2009-2013, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#if U_PLATFORM_HAS_WIN32_API
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54/* prototypes --------------------------------------------------------------- */
55static void
56getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58static uint32_t
59write8(FileStream *out, uint8_t byte, uint32_t column);
60
61static uint32_t
62write32(FileStream *out, uint32_t byte, uint32_t column);
63
64#if U_PLATFORM == U_PF_OS400
65static uint32_t
66write8str(FileStream *out, uint8_t byte, uint32_t column);
67#endif
68/* -------------------------------------------------------------------------- */
69
70/*
71Creating Template Files for New Platforms
72
73Let the cc compiler help you get started.
74Compile this program
75    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76with the -S option to produce assembly output.
77
78For example, this will generate array.s:
79gcc -S array.c
80
81This will produce a .s file that may look like this:
82
83    .file   "array.c"
84    .version        "01.01"
85gcc2_compiled.:
86    .globl x
87    .section        .rodata
88    .align 4
89    .type    x,@object
90    .size    x,20
91x:
92    .long   1
93    .long   2
94    .long   -559038737
95    .long   -1
96    .long   16
97    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99which gives a starting point that will compile, and can be transformed
100to become the template, generally with some consulting of as docs and
101some experimentation.
102
103If you want ICU to automatically use this assembly, you should
104specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105where the name is the compiler or platform that you used in this
106assemblyHeader data structure.
107*/
108static const struct AssemblyType {
109    const char *name;
110    const char *header;
111    const char *beginLine;
112    const char *footer;
113    int8_t      hexType; /* HEX_0X or HEX_0h */
114} assemblyHeader[] = {
115    // For gcc assemblers, the meaning of .align changes depending on the
116    // hardware, so we use .balign 16 which always means 16 bytes.
117    // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
118    {"gcc",
119        ".globl %s\n"
120        "\t.section .note.GNU-stack,\"\",%%progbits\n"
121        "\t.section .rodata\n"
122        "\t.balign 16\n"
123        "\t.type %s,%%object\n"
124        "%s:\n\n",
125
126        ".long ","",HEX_0X
127    },
128    {"gcc-darwin",
129        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
130        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
131        ".globl _%s\n"
132        "\t.data\n"
133        "\t.const\n"
134        "\t.balign 16\n"
135        "_%s:\n\n",
136
137        ".long ","",HEX_0X
138    },
139    {"gcc-cygwin",
140        ".globl _%s\n"
141        "\t.section .rodata\n"
142        "\t.balign 16\n"
143        "_%s:\n\n",
144
145        ".long ","",HEX_0X
146    },
147    {"gcc-mingw64",
148        ".globl %s\n"
149        "\t.section .rodata\n"
150        "\t.balign 16\n"
151        "%s:\n\n",
152
153        ".long ","",HEX_0X
154    },
155// 16 bytes alignment.
156// http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
157    {"sun",
158        "\t.section \".rodata\"\n"
159        "\t.align   16\n"
160        ".globl     %s\n"
161        "%s:\n",
162
163        ".word ","",HEX_0X
164    },
165// 16 bytes alignment for sun-x86.
166// http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
167    {"sun-x86",
168        "Drodata.rodata:\n"
169        "\t.type   Drodata.rodata,@object\n"
170        "\t.size   Drodata.rodata,0\n"
171        "\t.globl  %s\n"
172        "\t.align  16\n"
173        "%s:\n",
174
175        ".4byte ","",HEX_0X
176    },
177// 1<<4 bit alignment for aix.
178// http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
179    {"xlc",
180        ".globl %s{RO}\n"
181        "\t.toc\n"
182        "%s:\n"
183        "\t.csect %s{RO}, 4\n",
184
185        ".long ","",HEX_0X
186    },
187    {"aCC-ia64",
188        "\t.file   \"%s.s\"\n"
189        "\t.type   %s,@object\n"
190        "\t.global %s\n"
191        "\t.secalias .abe$0.rodata, \".rodata\"\n"
192        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
193        "\t.align  16\n"
194        "%s::\t",
195
196        "data4 ","",HEX_0X
197    },
198    {"aCC-parisc",
199        "\t.SPACE  $TEXT$\n"
200        "\t.SUBSPA $LIT$\n"
201        "%s\n"
202        "\t.EXPORT %s\n"
203        "\t.ALIGN  16\n",
204
205        ".WORD ","",HEX_0X
206    },
207// align 16 bytes
208//  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
209    { "masm",
210      "\tTITLE %s\n"
211      "; generated by genccode\n"
212      ".386\n"
213      ".model flat\n"
214      "\tPUBLIC _%s\n"
215      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
216      "\tALIGN 16\n"
217      "_%s\tLABEL DWORD\n",
218      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
219    }
220};
221
222static int32_t assemblyHeaderIndex = -1;
223static int32_t hexType = HEX_0X;
224
225U_CAPI UBool U_EXPORT2
226checkAssemblyHeaderName(const char* optAssembly) {
227    int32_t idx;
228    assemblyHeaderIndex = -1;
229    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
230        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
231            assemblyHeaderIndex = idx;
232            hexType = assemblyHeader[idx].hexType; /* set the hex type */
233            return TRUE;
234        }
235    }
236
237    return FALSE;
238}
239
240
241U_CAPI void U_EXPORT2
242printAssemblyHeadersToStdErr(void) {
243    int32_t idx;
244    fprintf(stderr, "%s", assemblyHeader[0].name);
245    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
246        fprintf(stderr, ", %s", assemblyHeader[idx].name);
247    }
248    fprintf(stderr,
249        ")\n");
250}
251
252U_CAPI void U_EXPORT2
253writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
254    uint32_t column = MAX_COLUMN;
255    char entry[64];
256    uint32_t buffer[1024];
257    char *bufferStr = (char *)buffer;
258    FileStream *in, *out;
259    size_t i, length;
260
261    in=T_FileStream_open(filename, "rb");
262    if(in==NULL) {
263        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
264        exit(U_FILE_ACCESS_ERROR);
265    }
266
267    getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
268    out=T_FileStream_open(bufferStr, "w");
269    if(out==NULL) {
270        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
271        exit(U_FILE_ACCESS_ERROR);
272    }
273
274    if (outFilePath != NULL) {
275        uprv_strcpy(outFilePath, bufferStr);
276    }
277
278#ifdef WINDOWS_WITH_GNUC
279    /* Need to fix the file seperator character when using MinGW. */
280    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
281#endif
282
283    if(optEntryPoint != NULL) {
284        uprv_strcpy(entry, optEntryPoint);
285        uprv_strcat(entry, "_dat");
286    }
287
288    /* turn dashes or dots in the entry name into underscores */
289    length=uprv_strlen(entry);
290    for(i=0; i<length; ++i) {
291        if(entry[i]=='-' || entry[i]=='.') {
292            entry[i]='_';
293        }
294    }
295
296    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
297        entry, entry, entry, entry,
298        entry, entry, entry, entry);
299    T_FileStream_writeLine(out, bufferStr);
300    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
301
302    for(;;) {
303        length=T_FileStream_read(in, buffer, sizeof(buffer));
304        if(length==0) {
305            break;
306        }
307        if (length != sizeof(buffer)) {
308            /* pad with extra 0's when at the end of the file */
309            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
310                buffer[length+i] = 0;
311            }
312        }
313        for(i=0; i<(length/sizeof(buffer[0])); i++) {
314            column = write32(out, buffer[i], column);
315        }
316    }
317
318    T_FileStream_writeLine(out, "\n");
319
320    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
321        entry, entry, entry, entry,
322        entry, entry, entry, entry);
323    T_FileStream_writeLine(out, bufferStr);
324
325    if(T_FileStream_error(in)) {
326        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
327        exit(U_FILE_ACCESS_ERROR);
328    }
329
330    if(T_FileStream_error(out)) {
331        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
332        exit(U_FILE_ACCESS_ERROR);
333    }
334
335    T_FileStream_close(out);
336    T_FileStream_close(in);
337}
338
339U_CAPI void U_EXPORT2
340writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
341    uint32_t column = MAX_COLUMN;
342    char buffer[4096], entry[64];
343    FileStream *in, *out;
344    size_t i, length;
345
346    in=T_FileStream_open(filename, "rb");
347    if(in==NULL) {
348        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
349        exit(U_FILE_ACCESS_ERROR);
350    }
351
352    if(optName != NULL) { /* prepend  'icudt28_' */
353      strcpy(entry, optName);
354      strcat(entry, "_");
355    } else {
356      entry[0] = 0;
357    }
358
359    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
360    if (outFilePath != NULL) {
361        uprv_strcpy(outFilePath, buffer);
362    }
363    out=T_FileStream_open(buffer, "w");
364    if(out==NULL) {
365        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
366        exit(U_FILE_ACCESS_ERROR);
367    }
368
369    /* turn dashes or dots in the entry name into underscores */
370    length=uprv_strlen(entry);
371    for(i=0; i<length; ++i) {
372        if(entry[i]=='-' || entry[i]=='.') {
373            entry[i]='_';
374        }
375    }
376
377#if U_PLATFORM == U_PF_OS400
378    /*
379    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
380
381    This is here because this platform can't currently put
382    const data into the read-only pages of an object or
383    shared library (service program). Only strings are allowed in read-only
384    pages, so we use char * strings to store the data.
385
386    In order to prevent the beginning of the data from ever matching the
387    magic numbers we must still use the initial double.
388    [grhoten 4/24/2003]
389    */
390    sprintf(buffer,
391        "#ifndef IN_GENERATED_CCODE\n"
392        "#define IN_GENERATED_CCODE\n"
393        "#define U_DISABLE_RENAMING 1\n"
394        "#include \"unicode/umachine.h\"\n"
395        "#endif\n"
396        "U_CDECL_BEGIN\n"
397        "const struct {\n"
398        "    double bogus;\n"
399        "    const char *bytes; \n"
400        "} %s={ 0.0, \n",
401        entry);
402    T_FileStream_writeLine(out, buffer);
403
404    for(;;) {
405        length=T_FileStream_read(in, buffer, sizeof(buffer));
406        if(length==0) {
407            break;
408        }
409        for(i=0; i<length; ++i) {
410            column = write8str(out, (uint8_t)buffer[i], column);
411        }
412    }
413
414    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
415#else
416    /* Function renaming shouldn't be done in data */
417    sprintf(buffer,
418        "#ifndef IN_GENERATED_CCODE\n"
419        "#define IN_GENERATED_CCODE\n"
420        "#define U_DISABLE_RENAMING 1\n"
421        "#include \"unicode/umachine.h\"\n"
422        "#endif\n"
423        "U_CDECL_BEGIN\n"
424        "const struct {\n"
425        "    double bogus;\n"
426        "    uint8_t bytes[%ld]; \n"
427        "} %s={ 0.0, {\n",
428        (long)T_FileStream_size(in), entry);
429    T_FileStream_writeLine(out, buffer);
430
431    for(;;) {
432        length=T_FileStream_read(in, buffer, sizeof(buffer));
433        if(length==0) {
434            break;
435        }
436        for(i=0; i<length; ++i) {
437            column = write8(out, (uint8_t)buffer[i], column);
438        }
439    }
440
441    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
442#endif
443
444    if(T_FileStream_error(in)) {
445        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
446        exit(U_FILE_ACCESS_ERROR);
447    }
448
449    if(T_FileStream_error(out)) {
450        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
451        exit(U_FILE_ACCESS_ERROR);
452    }
453
454    T_FileStream_close(out);
455    T_FileStream_close(in);
456}
457
458static uint32_t
459write32(FileStream *out, uint32_t bitField, uint32_t column) {
460    int32_t i;
461    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
462    char *s = bitFieldStr;
463    uint8_t *ptrIdx = (uint8_t *)&bitField;
464    static const char hexToStr[16] = {
465        '0','1','2','3',
466        '4','5','6','7',
467        '8','9','A','B',
468        'C','D','E','F'
469    };
470
471    /* write the value, possibly with comma and newline */
472    if(column==MAX_COLUMN) {
473        /* first byte */
474        column=1;
475    } else if(column<32) {
476        *(s++)=',';
477        ++column;
478    } else {
479        *(s++)='\n';
480        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
481        s+=uprv_strlen(s);
482        column=1;
483    }
484
485    if (bitField < 10) {
486        /* It's a small number. Don't waste the space for 0x */
487        *(s++)=hexToStr[bitField];
488    }
489    else {
490        int seenNonZero = 0; /* This is used to remove leading zeros */
491
492        if(hexType==HEX_0X) {
493         *(s++)='0';
494         *(s++)='x';
495        } else if(hexType==HEX_0H) {
496         *(s++)='0';
497        }
498
499        /* This creates a 32-bit field */
500#if U_IS_BIG_ENDIAN
501        for (i = 0; i < sizeof(uint32_t); i++)
502#else
503        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
504#endif
505        {
506            uint8_t value = ptrIdx[i];
507            if (value || seenNonZero) {
508                *(s++)=hexToStr[value>>4];
509                *(s++)=hexToStr[value&0xF];
510                seenNonZero = 1;
511            }
512        }
513        if(hexType==HEX_0H) {
514         *(s++)='h';
515        }
516    }
517
518    *(s++)=0;
519    T_FileStream_writeLine(out, bitFieldStr);
520    return column;
521}
522
523static uint32_t
524write8(FileStream *out, uint8_t byte, uint32_t column) {
525    char s[4];
526    int i=0;
527
528    /* convert the byte value to a string */
529    if(byte>=100) {
530        s[i++]=(char)('0'+byte/100);
531        byte%=100;
532    }
533    if(i>0 || byte>=10) {
534        s[i++]=(char)('0'+byte/10);
535        byte%=10;
536    }
537    s[i++]=(char)('0'+byte);
538    s[i]=0;
539
540    /* write the value, possibly with comma and newline */
541    if(column==MAX_COLUMN) {
542        /* first byte */
543        column=1;
544    } else if(column<16) {
545        T_FileStream_writeLine(out, ",");
546        ++column;
547    } else {
548        T_FileStream_writeLine(out, ",\n");
549        column=1;
550    }
551    T_FileStream_writeLine(out, s);
552    return column;
553}
554
555#if U_PLATFORM == U_PF_OS400
556static uint32_t
557write8str(FileStream *out, uint8_t byte, uint32_t column) {
558    char s[8];
559
560    if (byte > 7)
561        sprintf(s, "\\x%X", byte);
562    else
563        sprintf(s, "\\%X", byte);
564
565    /* write the value, possibly with comma and newline */
566    if(column==MAX_COLUMN) {
567        /* first byte */
568        column=1;
569        T_FileStream_writeLine(out, "\"");
570    } else if(column<24) {
571        ++column;
572    } else {
573        T_FileStream_writeLine(out, "\"\n\"");
574        column=1;
575    }
576    T_FileStream_writeLine(out, s);
577    return column;
578}
579#endif
580
581static void
582getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
583    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
584
585    /* copy path */
586    if(destdir!=NULL && *destdir!=0) {
587        do {
588            *outFilename++=*destdir++;
589        } while(*destdir!=0);
590        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
591            *outFilename++=U_FILE_SEP_CHAR;
592        }
593        inFilename=basename;
594    } else {
595        while(inFilename<basename) {
596            *outFilename++=*inFilename++;
597        }
598    }
599
600    if(suffix==NULL) {
601        /* the filename does not have a suffix */
602        uprv_strcpy(entryName, inFilename);
603        if(optFilename != NULL) {
604          uprv_strcpy(outFilename, optFilename);
605        } else {
606          uprv_strcpy(outFilename, inFilename);
607        }
608        uprv_strcat(outFilename, newSuffix);
609    } else {
610        char *saveOutFilename = outFilename;
611        /* copy basename */
612        while(inFilename<suffix) {
613            if(*inFilename=='-') {
614                /* iSeries cannot have '-' in the .o objects. */
615                *outFilename++=*entryName++='_';
616                inFilename++;
617            }
618            else {
619                *outFilename++=*entryName++=*inFilename++;
620            }
621        }
622
623        /* replace '.' by '_' */
624        *outFilename++=*entryName++='_';
625        ++inFilename;
626
627        /* copy suffix */
628        while(*inFilename!=0) {
629            *outFilename++=*entryName++=*inFilename++;
630        }
631
632        *entryName=0;
633
634        if(optFilename != NULL) {
635            uprv_strcpy(saveOutFilename, optFilename);
636            uprv_strcat(saveOutFilename, newSuffix);
637        } else {
638            /* add ".c" */
639            uprv_strcpy(outFilename, newSuffix);
640        }
641    }
642}
643
644#ifdef CAN_GENERATE_OBJECTS
645static void
646getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
647    union {
648        char        bytes[2048];
649#ifdef U_ELF
650        Elf32_Ehdr  header32;
651        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
652#elif U_PLATFORM_HAS_WIN32_API
653        IMAGE_FILE_HEADER header;
654#endif
655    } buffer;
656
657    const char *filename;
658    FileStream *in;
659    int32_t length;
660
661#ifdef U_ELF
662
663#elif U_PLATFORM_HAS_WIN32_API
664    const IMAGE_FILE_HEADER *pHeader;
665#else
666#   error "Unknown platform for CAN_GENERATE_OBJECTS."
667#endif
668
669    if(optMatchArch != NULL) {
670        filename=optMatchArch;
671    } else {
672        /* set defaults */
673#ifdef U_ELF
674        /* set EM_386 because elf.h does not provide better defaults */
675        *pCPU=EM_386;
676        *pBits=32;
677        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
678#elif U_PLATFORM_HAS_WIN32_API
679/* _M_IA64 should be defined in windows.h */
680#   if defined(_M_IA64)
681        *pCPU=IMAGE_FILE_MACHINE_IA64;
682#   elif defined(_M_AMD64)
683        *pCPU=IMAGE_FILE_MACHINE_AMD64;
684#   else
685        *pCPU=IMAGE_FILE_MACHINE_I386;
686#   endif
687        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
688        *pIsBigEndian=FALSE;
689#else
690#   error "Unknown platform for CAN_GENERATE_OBJECTS."
691#endif
692        return;
693    }
694
695    in=T_FileStream_open(filename, "rb");
696    if(in==NULL) {
697        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
698        exit(U_FILE_ACCESS_ERROR);
699    }
700    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
701
702#ifdef U_ELF
703    if(length<sizeof(Elf32_Ehdr)) {
704        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
705        exit(U_UNSUPPORTED_ERROR);
706    }
707    if(
708        buffer.header32.e_ident[0]!=ELFMAG0 ||
709        buffer.header32.e_ident[1]!=ELFMAG1 ||
710        buffer.header32.e_ident[2]!=ELFMAG2 ||
711        buffer.header32.e_ident[3]!=ELFMAG3 ||
712        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
713    ) {
714        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
715        exit(U_UNSUPPORTED_ERROR);
716    }
717
718    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
719#ifdef U_ELF64
720    if(*pBits!=32 && *pBits!=64) {
721        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
722        exit(U_UNSUPPORTED_ERROR);
723    }
724#else
725    if(*pBits!=32) {
726        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
727        exit(U_UNSUPPORTED_ERROR);
728    }
729#endif
730
731    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
732    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
733        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
734        exit(U_UNSUPPORTED_ERROR);
735    }
736    /* TODO: Support byte swapping */
737
738    *pCPU=buffer.header32.e_machine;
739#elif U_PLATFORM_HAS_WIN32_API
740    if(length<sizeof(IMAGE_FILE_HEADER)) {
741        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
742        exit(U_UNSUPPORTED_ERROR);
743    }
744    /* TODO: Use buffer.header.  Keep aliasing legal.  */
745    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
746    *pCPU=pHeader->Machine;
747    /*
748     * The number of bits is implicit with the Machine value.
749     * *pBits is ignored in the calling code, so this need not be precise.
750     */
751    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
752    /* Windows always runs on little-endian CPUs. */
753    *pIsBigEndian=FALSE;
754#else
755#   error "Unknown platform for CAN_GENERATE_OBJECTS."
756#endif
757
758    T_FileStream_close(in);
759}
760
761U_CAPI void U_EXPORT2
762writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
763    /* common variables */
764    char buffer[4096], entry[40]={ 0 };
765    FileStream *in, *out;
766    const char *newSuffix;
767    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
768
769    uint16_t cpu, bits;
770    UBool makeBigEndian;
771
772    /* platform-specific variables and initialization code */
773#ifdef U_ELF
774    /* 32-bit Elf file header */
775    static Elf32_Ehdr header32={
776        {
777            /* e_ident[] */
778            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
779            ELFCLASS32,
780            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
781            EV_CURRENT /* EI_VERSION */
782        },
783        ET_REL,
784        EM_386,
785        EV_CURRENT, /* e_version */
786        0, /* e_entry */
787        0, /* e_phoff */
788        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
789        0, /* e_flags */
790        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
791        0, /* e_phentsize */
792        0, /* e_phnum */
793        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
794        5, /* e_shnum */
795        2 /* e_shstrndx */
796    };
797
798    /* 32-bit Elf section header table */
799    static Elf32_Shdr sectionHeaders32[5]={
800        { /* SHN_UNDEF */
801            0
802        },
803        { /* .symtab */
804            1, /* sh_name */
805            SHT_SYMTAB,
806            0, /* sh_flags */
807            0, /* sh_addr */
808            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
809            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
810            3, /* sh_link=sect hdr index of .strtab */
811            1, /* sh_info=One greater than the symbol table index of the last
812                * local symbol (with STB_LOCAL). */
813            4, /* sh_addralign */
814            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
815        },
816        { /* .shstrtab */
817            9, /* sh_name */
818            SHT_STRTAB,
819            0, /* sh_flags */
820            0, /* sh_addr */
821            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
822            40, /* sh_size */
823            0, /* sh_link */
824            0, /* sh_info */
825            1, /* sh_addralign */
826            0 /* sh_entsize */
827        },
828        { /* .strtab */
829            19, /* sh_name */
830            SHT_STRTAB,
831            0, /* sh_flags */
832            0, /* sh_addr */
833            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
834            (Elf32_Word)sizeof(entry), /* sh_size */
835            0, /* sh_link */
836            0, /* sh_info */
837            1, /* sh_addralign */
838            0 /* sh_entsize */
839        },
840        { /* .rodata */
841            27, /* sh_name */
842            SHT_PROGBITS,
843            SHF_ALLOC, /* sh_flags */
844            0, /* sh_addr */
845            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
846            0, /* sh_size */
847            0, /* sh_link */
848            0, /* sh_info */
849            16, /* sh_addralign */
850            0 /* sh_entsize */
851        }
852    };
853
854    /* symbol table */
855    static Elf32_Sym symbols32[2]={
856        { /* STN_UNDEF */
857            0
858        },
859        { /* data entry point */
860            1, /* st_name */
861            0, /* st_value */
862            0, /* st_size */
863            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
864            0, /* st_other */
865            4 /* st_shndx=index of related section table entry */
866        }
867    };
868
869    /* section header string table, with decimal string offsets */
870    static const char sectionStrings[40]=
871        /*  0 */ "\0"
872        /*  1 */ ".symtab\0"
873        /*  9 */ ".shstrtab\0"
874        /* 19 */ ".strtab\0"
875        /* 27 */ ".rodata\0"
876        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
877        /* 40: padded to multiple of 8 bytes */
878
879    /*
880     * Use entry[] for the string table which will contain only the
881     * entry point name.
882     * entry[0] must be 0 (NUL)
883     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
884     */
885
886    /* 16-align .rodata in the .o file, just in case */
887    static const char padding[16]={ 0 };
888    int32_t paddingSize;
889
890#ifdef U_ELF64
891    /* 64-bit Elf file header */
892    static Elf64_Ehdr header64={
893        {
894            /* e_ident[] */
895            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
896            ELFCLASS64,
897            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
898            EV_CURRENT /* EI_VERSION */
899        },
900        ET_REL,
901        EM_X86_64,
902        EV_CURRENT, /* e_version */
903        0, /* e_entry */
904        0, /* e_phoff */
905        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
906        0, /* e_flags */
907        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
908        0, /* e_phentsize */
909        0, /* e_phnum */
910        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
911        5, /* e_shnum */
912        2 /* e_shstrndx */
913    };
914
915    /* 64-bit Elf section header table */
916    static Elf64_Shdr sectionHeaders64[5]={
917        { /* SHN_UNDEF */
918            0
919        },
920        { /* .symtab */
921            1, /* sh_name */
922            SHT_SYMTAB,
923            0, /* sh_flags */
924            0, /* sh_addr */
925            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
926            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
927            3, /* sh_link=sect hdr index of .strtab */
928            1, /* sh_info=One greater than the symbol table index of the last
929                * local symbol (with STB_LOCAL). */
930            4, /* sh_addralign */
931            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
932        },
933        { /* .shstrtab */
934            9, /* sh_name */
935            SHT_STRTAB,
936            0, /* sh_flags */
937            0, /* sh_addr */
938            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
939            40, /* sh_size */
940            0, /* sh_link */
941            0, /* sh_info */
942            1, /* sh_addralign */
943            0 /* sh_entsize */
944        },
945        { /* .strtab */
946            19, /* sh_name */
947            SHT_STRTAB,
948            0, /* sh_flags */
949            0, /* sh_addr */
950            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
951            (Elf64_Xword)sizeof(entry), /* sh_size */
952            0, /* sh_link */
953            0, /* sh_info */
954            1, /* sh_addralign */
955            0 /* sh_entsize */
956        },
957        { /* .rodata */
958            27, /* sh_name */
959            SHT_PROGBITS,
960            SHF_ALLOC, /* sh_flags */
961            0, /* sh_addr */
962            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
963            0, /* sh_size */
964            0, /* sh_link */
965            0, /* sh_info */
966            16, /* sh_addralign */
967            0 /* sh_entsize */
968        }
969    };
970
971    /*
972     * 64-bit symbol table
973     * careful: different order of items compared with Elf32_sym!
974     */
975    static Elf64_Sym symbols64[2]={
976        { /* STN_UNDEF */
977            0
978        },
979        { /* data entry point */
980            1, /* st_name */
981            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
982            0, /* st_other */
983            4, /* st_shndx=index of related section table entry */
984            0, /* st_value */
985            0 /* st_size */
986        }
987    };
988
989#endif /* U_ELF64 */
990
991    /* entry[] have a leading NUL */
992    entryOffset=1;
993
994    /* in the common code, count entryLength from after the NUL */
995    entryLengthOffset=1;
996
997    newSuffix=".o";
998
999#elif U_PLATFORM_HAS_WIN32_API
1000    struct {
1001        IMAGE_FILE_HEADER fileHeader;
1002        IMAGE_SECTION_HEADER sections[2];
1003        char linkerOptions[100];
1004    } objHeader;
1005    IMAGE_SYMBOL symbols[1];
1006    struct {
1007        DWORD sizeofLongNames;
1008        char longNames[100];
1009    } symbolNames;
1010
1011    /*
1012     * entry sometimes have a leading '_'
1013     * overwritten if entryOffset==0 depending on the target platform
1014     * see check for cpu below
1015     */
1016    entry[0]='_';
1017
1018    newSuffix=".obj";
1019#else
1020#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1021#endif
1022
1023    /* deal with options, files and the entry point name */
1024    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1025    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1026#if U_PLATFORM_HAS_WIN32_API
1027    if(cpu==IMAGE_FILE_MACHINE_I386) {
1028        entryOffset=1;
1029    }
1030#endif
1031
1032    in=T_FileStream_open(filename, "rb");
1033    if(in==NULL) {
1034        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1035        exit(U_FILE_ACCESS_ERROR);
1036    }
1037    size=T_FileStream_size(in);
1038
1039    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1040    if (outFilePath != NULL) {
1041        uprv_strcpy(outFilePath, buffer);
1042    }
1043
1044    if(optEntryPoint != NULL) {
1045        uprv_strcpy(entry+entryOffset, optEntryPoint);
1046        uprv_strcat(entry+entryOffset, "_dat");
1047    }
1048    /* turn dashes in the entry name into underscores */
1049    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1050    for(i=0; i<entryLength; ++i) {
1051        if(entry[entryLengthOffset+i]=='-') {
1052            entry[entryLengthOffset+i]='_';
1053        }
1054    }
1055
1056    /* open the output file */
1057    out=T_FileStream_open(buffer, "wb");
1058    if(out==NULL) {
1059        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1060        exit(U_FILE_ACCESS_ERROR);
1061    }
1062
1063#ifdef U_ELF
1064    if(bits==32) {
1065        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1066        header32.e_machine=cpu;
1067
1068        /* 16-align .rodata in the .o file, just in case */
1069        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1070        if(paddingSize!=0) {
1071                paddingSize=0x10-paddingSize;
1072                sectionHeaders32[4].sh_offset+=paddingSize;
1073        }
1074
1075        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1076
1077        symbols32[1].st_size=(Elf32_Word)size;
1078
1079        /* write .o headers */
1080        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1081        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1082        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1083    } else /* bits==64 */ {
1084#ifdef U_ELF64
1085        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1086        header64.e_machine=cpu;
1087
1088        /* 16-align .rodata in the .o file, just in case */
1089        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1090        if(paddingSize!=0) {
1091                paddingSize=0x10-paddingSize;
1092                sectionHeaders64[4].sh_offset+=paddingSize;
1093        }
1094
1095        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1096
1097        symbols64[1].st_size=(Elf64_Xword)size;
1098
1099        /* write .o headers */
1100        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1101        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1102        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1103#endif
1104    }
1105
1106    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1107    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1108    if(paddingSize!=0) {
1109        T_FileStream_write(out, padding, paddingSize);
1110    }
1111#elif U_PLATFORM_HAS_WIN32_API
1112    /* populate the .obj headers */
1113    uprv_memset(&objHeader, 0, sizeof(objHeader));
1114    uprv_memset(&symbols, 0, sizeof(symbols));
1115    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1116
1117    /* write the linker export directive */
1118    uprv_strcpy(objHeader.linkerOptions, "-export:");
1119    length=8;
1120    uprv_strcpy(objHeader.linkerOptions+length, entry);
1121    length+=entryLength;
1122    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1123    length+=6;
1124
1125    /* set the file header */
1126    objHeader.fileHeader.Machine=cpu;
1127    objHeader.fileHeader.NumberOfSections=2;
1128    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1129    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1130    objHeader.fileHeader.NumberOfSymbols=1;
1131
1132    /* set the section for the linker options */
1133    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1134    objHeader.sections[0].SizeOfRawData=length;
1135    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1136    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1137
1138    /* set the data section */
1139    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1140    objHeader.sections[1].SizeOfRawData=size;
1141    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1142    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1143
1144    /* set the symbol table */
1145    if(entryLength<=8) {
1146        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1147        symbolNames.sizeofLongNames=4;
1148    } else {
1149        symbols[0].N.Name.Short=0;
1150        symbols[0].N.Name.Long=4;
1151        symbolNames.sizeofLongNames=4+entryLength+1;
1152        uprv_strcpy(symbolNames.longNames, entry);
1153    }
1154    symbols[0].SectionNumber=2;
1155    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1156
1157    /* write the file header and the linker options section */
1158    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1159#else
1160#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1161#endif
1162
1163    /* copy the data file into section 2 */
1164    for(;;) {
1165        length=T_FileStream_read(in, buffer, sizeof(buffer));
1166        if(length==0) {
1167            break;
1168        }
1169        T_FileStream_write(out, buffer, (int32_t)length);
1170    }
1171
1172#if U_PLATFORM_HAS_WIN32_API
1173    /* write the symbol table */
1174    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1175    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1176#endif
1177
1178    if(T_FileStream_error(in)) {
1179        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1180        exit(U_FILE_ACCESS_ERROR);
1181    }
1182
1183    if(T_FileStream_error(out)) {
1184        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1185        exit(U_FILE_ACCESS_ERROR);
1186    }
1187
1188    T_FileStream_close(out);
1189    T_FileStream_close(in);
1190}
1191#endif
1192