1/******************************************************************************
2 *   Copyright (C) 2009-2013, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#if U_PLATFORM_HAS_WIN32_API
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#if U_PLATFORM_IS_LINUX_BASED && U_HAVE_ELF_H
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54/* prototypes --------------------------------------------------------------- */
55static void
56getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
57
58static uint32_t
59write8(FileStream *out, uint8_t byte, uint32_t column);
60
61static uint32_t
62write32(FileStream *out, uint32_t byte, uint32_t column);
63
64#if U_PLATFORM == U_PF_OS400
65static uint32_t
66write8str(FileStream *out, uint8_t byte, uint32_t column);
67#endif
68/* -------------------------------------------------------------------------- */
69
70/*
71Creating Template Files for New Platforms
72
73Let the cc compiler help you get started.
74Compile this program
75    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
76with the -S option to produce assembly output.
77
78For example, this will generate array.s:
79gcc -S array.c
80
81This will produce a .s file that may look like this:
82
83    .file   "array.c"
84    .version        "01.01"
85gcc2_compiled.:
86    .globl x
87    .section        .rodata
88    .align 4
89    .type    x,@object
90    .size    x,20
91x:
92    .long   1
93    .long   2
94    .long   -559038737
95    .long   -1
96    .long   16
97    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
98
99which gives a starting point that will compile, and can be transformed
100to become the template, generally with some consulting of as docs and
101some experimentation.
102
103If you want ICU to automatically use this assembly, you should
104specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
105where the name is the compiler or platform that you used in this
106assemblyHeader data structure.
107*/
108static const struct AssemblyType {
109    const char *name;
110    const char *header;
111    const char *beginLine;
112    const char *footer;
113    int8_t      hexType; /* HEX_0X or HEX_0h */
114} assemblyHeader[] = {
115    // For gcc assemblers, the meaning of .align changes depending on the
116    // hardware, so we use .balign 16 which always means 16 bytes.
117    // https://sourceware.org/binutils/docs/as/Pseudo-Ops.html
118    {"gcc",
119        ".globl %s\n"
120        "\t.section .note.GNU-stack,\"\",%%progbits\n"
121        "\t.section .rodata\n"
122        "\t.balign 16\n"
123        /* The 3 lines below are added for Chrome. */
124        "#ifdef U_HIDE_DATA_SYMBOL\n"
125        "\t.hidden %s\n"
126        "#endif\n"
127        "\t.type %s,%%object\n"
128        "%s:\n\n",
129
130        ".long ","",HEX_0X
131    },
132    {"gcc-darwin",
133        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
134        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
135        ".globl _%s\n"
136        /* The 3 lines below are added for Chrome. */
137        "#ifdef U_HIDE_DATA_SYMBOL\n"
138        "\t.private_extern _%s\n"
139        "#endif\n"
140        "\t.data\n"
141        "\t.const\n"
142        "\t.balign 16\n"
143        "_%s:\n\n",
144
145        ".long ","",HEX_0X
146    },
147    {"gcc-cygwin",
148        ".globl _%s\n"
149        "\t.section .rodata\n"
150        "\t.balign 16\n"
151        "_%s:\n\n",
152
153        ".long ","",HEX_0X
154    },
155    {"gcc-mingw64",
156        ".globl %s\n"
157        "\t.section .rodata\n"
158        "\t.balign 16\n"
159        "%s:\n\n",
160
161        ".long ","",HEX_0X
162    },
163// 16 bytes alignment.
164// http://docs.oracle.com/cd/E19641-01/802-1947/802-1947.pdf
165    {"sun",
166        "\t.section \".rodata\"\n"
167        "\t.align   16\n"
168        ".globl     %s\n"
169        "%s:\n",
170
171        ".word ","",HEX_0X
172    },
173// 16 bytes alignment for sun-x86.
174// http://docs.oracle.com/cd/E19963-01/html/821-1608/eoiyg.html
175    {"sun-x86",
176        "Drodata.rodata:\n"
177        "\t.type   Drodata.rodata,@object\n"
178        "\t.size   Drodata.rodata,0\n"
179        "\t.globl  %s\n"
180        "\t.align  16\n"
181        "%s:\n",
182
183        ".4byte ","",HEX_0X
184    },
185// 1<<4 bit alignment for aix.
186// http://pic.dhe.ibm.com/infocenter/aix/v6r1/index.jsp?topic=%2Fcom.ibm.aix.aixassem%2Fdoc%2Falangref%2Fidalangref_csect_pseudoop.htm
187    {"xlc",
188        ".globl %s{RO}\n"
189        "\t.toc\n"
190        "%s:\n"
191        "\t.csect %s{RO}, 4\n",
192
193        ".long ","",HEX_0X
194    },
195    {"aCC-ia64",
196        "\t.file   \"%s.s\"\n"
197        "\t.type   %s,@object\n"
198        "\t.global %s\n"
199        "\t.secalias .abe$0.rodata, \".rodata\"\n"
200        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
201        "\t.align  16\n"
202        "%s::\t",
203
204        "data4 ","",HEX_0X
205    },
206    {"aCC-parisc",
207        "\t.SPACE  $TEXT$\n"
208        "\t.SUBSPA $LIT$\n"
209        "%s\n"
210        "\t.EXPORT %s\n"
211        "\t.ALIGN  16\n",
212
213        ".WORD ","",HEX_0X
214    },
215// align 16 bytes
216//  http://msdn.microsoft.com/en-us/library/dwa9fwef.aspx
217    { "masm",
218      "\tTITLE %s\n"
219      "; generated by genccode\n"
220      ".386\n"
221      ".model flat\n"
222      "\tPUBLIC _%s\n"
223      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
224      "\tALIGN 16\n"
225      "_%s\tLABEL DWORD\n",
226      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
227    }
228};
229
230static int32_t assemblyHeaderIndex = -1;
231static int32_t hexType = HEX_0X;
232
233U_CAPI UBool U_EXPORT2
234checkAssemblyHeaderName(const char* optAssembly) {
235    int32_t idx;
236    assemblyHeaderIndex = -1;
237    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
238        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
239            assemblyHeaderIndex = idx;
240            hexType = assemblyHeader[idx].hexType; /* set the hex type */
241            return TRUE;
242        }
243    }
244
245    return FALSE;
246}
247
248
249U_CAPI void U_EXPORT2
250printAssemblyHeadersToStdErr(void) {
251    int32_t idx;
252    fprintf(stderr, "%s", assemblyHeader[0].name);
253    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
254        fprintf(stderr, ", %s", assemblyHeader[idx].name);
255    }
256    fprintf(stderr,
257        ")\n");
258}
259
260U_CAPI void U_EXPORT2
261writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
262    uint32_t column = MAX_COLUMN;
263    char entry[64];
264    uint32_t buffer[1024];
265    char *bufferStr = (char *)buffer;
266    FileStream *in, *out;
267    size_t i, length;
268
269    in=T_FileStream_open(filename, "rb");
270    if(in==NULL) {
271        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
272        exit(U_FILE_ACCESS_ERROR);
273    }
274
275    getOutFilename(filename, destdir, bufferStr, entry, ".S", optFilename);
276    out=T_FileStream_open(bufferStr, "w");
277    if(out==NULL) {
278        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
279        exit(U_FILE_ACCESS_ERROR);
280    }
281
282    if (outFilePath != NULL) {
283        uprv_strcpy(outFilePath, bufferStr);
284    }
285
286#ifdef WINDOWS_WITH_GNUC
287    /* Need to fix the file seperator character when using MinGW. */
288    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
289#endif
290
291    if(optEntryPoint != NULL) {
292        uprv_strcpy(entry, optEntryPoint);
293        uprv_strcat(entry, "_dat");
294    }
295
296    /* turn dashes or dots in the entry name into underscores */
297    length=uprv_strlen(entry);
298    for(i=0; i<length; ++i) {
299        if(entry[i]=='-' || entry[i]=='.') {
300            entry[i]='_';
301        }
302    }
303
304    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
305        entry, entry, entry, entry,
306        entry, entry, entry, entry);
307    T_FileStream_writeLine(out, bufferStr);
308    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
309
310    for(;;) {
311        length=T_FileStream_read(in, buffer, sizeof(buffer));
312        if(length==0) {
313            break;
314        }
315        if (length != sizeof(buffer)) {
316            /* pad with extra 0's when at the end of the file */
317            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
318                buffer[length+i] = 0;
319            }
320        }
321        for(i=0; i<(length/sizeof(buffer[0])); i++) {
322            column = write32(out, buffer[i], column);
323        }
324    }
325
326    T_FileStream_writeLine(out, "\n");
327
328    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
329        entry, entry, entry, entry,
330        entry, entry, entry, entry);
331    T_FileStream_writeLine(out, bufferStr);
332
333    if(T_FileStream_error(in)) {
334        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
335        exit(U_FILE_ACCESS_ERROR);
336    }
337
338    if(T_FileStream_error(out)) {
339        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
340        exit(U_FILE_ACCESS_ERROR);
341    }
342
343    T_FileStream_close(out);
344    T_FileStream_close(in);
345}
346
347U_CAPI void U_EXPORT2
348writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
349    uint32_t column = MAX_COLUMN;
350    char buffer[4096], entry[64];
351    FileStream *in, *out;
352    size_t i, length;
353
354    in=T_FileStream_open(filename, "rb");
355    if(in==NULL) {
356        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
357        exit(U_FILE_ACCESS_ERROR);
358    }
359
360    if(optName != NULL) { /* prepend  'icudt28_' */
361      strcpy(entry, optName);
362      strcat(entry, "_");
363    } else {
364      entry[0] = 0;
365    }
366
367    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
368    if (outFilePath != NULL) {
369        uprv_strcpy(outFilePath, buffer);
370    }
371    out=T_FileStream_open(buffer, "w");
372    if(out==NULL) {
373        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
374        exit(U_FILE_ACCESS_ERROR);
375    }
376
377    /* turn dashes or dots in the entry name into underscores */
378    length=uprv_strlen(entry);
379    for(i=0; i<length; ++i) {
380        if(entry[i]=='-' || entry[i]=='.') {
381            entry[i]='_';
382        }
383    }
384
385#if U_PLATFORM == U_PF_OS400
386    /*
387    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
388
389    This is here because this platform can't currently put
390    const data into the read-only pages of an object or
391    shared library (service program). Only strings are allowed in read-only
392    pages, so we use char * strings to store the data.
393
394    In order to prevent the beginning of the data from ever matching the
395    magic numbers we must still use the initial double.
396    [grhoten 4/24/2003]
397    */
398    sprintf(buffer,
399        "#ifndef IN_GENERATED_CCODE\n"
400        "#define IN_GENERATED_CCODE\n"
401        "#define U_DISABLE_RENAMING 1\n"
402        "#include \"unicode/umachine.h\"\n"
403        "#endif\n"
404        "U_CDECL_BEGIN\n"
405        "const struct {\n"
406        "    double bogus;\n"
407        "    const char *bytes; \n"
408        "} %s={ 0.0, \n",
409        entry);
410    T_FileStream_writeLine(out, buffer);
411
412    for(;;) {
413        length=T_FileStream_read(in, buffer, sizeof(buffer));
414        if(length==0) {
415            break;
416        }
417        for(i=0; i<length; ++i) {
418            column = write8str(out, (uint8_t)buffer[i], column);
419        }
420    }
421
422    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
423#else
424    /* Function renaming shouldn't be done in data */
425    sprintf(buffer,
426        "#ifndef IN_GENERATED_CCODE\n"
427        "#define IN_GENERATED_CCODE\n"
428        "#define U_DISABLE_RENAMING 1\n"
429        "#include \"unicode/umachine.h\"\n"
430        "#endif\n"
431        "U_CDECL_BEGIN\n"
432        "const struct {\n"
433        "    double bogus;\n"
434        "    uint8_t bytes[%ld]; \n"
435        "} %s={ 0.0, {\n",
436        (long)T_FileStream_size(in), entry);
437    T_FileStream_writeLine(out, buffer);
438
439    for(;;) {
440        length=T_FileStream_read(in, buffer, sizeof(buffer));
441        if(length==0) {
442            break;
443        }
444        for(i=0; i<length; ++i) {
445            column = write8(out, (uint8_t)buffer[i], column);
446        }
447    }
448
449    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
450#endif
451
452    if(T_FileStream_error(in)) {
453        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
454        exit(U_FILE_ACCESS_ERROR);
455    }
456
457    if(T_FileStream_error(out)) {
458        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
459        exit(U_FILE_ACCESS_ERROR);
460    }
461
462    T_FileStream_close(out);
463    T_FileStream_close(in);
464}
465
466static uint32_t
467write32(FileStream *out, uint32_t bitField, uint32_t column) {
468    int32_t i;
469    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
470    char *s = bitFieldStr;
471    uint8_t *ptrIdx = (uint8_t *)&bitField;
472    static const char hexToStr[16] = {
473        '0','1','2','3',
474        '4','5','6','7',
475        '8','9','A','B',
476        'C','D','E','F'
477    };
478
479    /* write the value, possibly with comma and newline */
480    if(column==MAX_COLUMN) {
481        /* first byte */
482        column=1;
483    } else if(column<32) {
484        *(s++)=',';
485        ++column;
486    } else {
487        *(s++)='\n';
488        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
489        s+=uprv_strlen(s);
490        column=1;
491    }
492
493    if (bitField < 10) {
494        /* It's a small number. Don't waste the space for 0x */
495        *(s++)=hexToStr[bitField];
496    }
497    else {
498        int seenNonZero = 0; /* This is used to remove leading zeros */
499
500        if(hexType==HEX_0X) {
501         *(s++)='0';
502         *(s++)='x';
503        } else if(hexType==HEX_0H) {
504         *(s++)='0';
505        }
506
507        /* This creates a 32-bit field */
508#if U_IS_BIG_ENDIAN
509        for (i = 0; i < sizeof(uint32_t); i++)
510#else
511        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
512#endif
513        {
514            uint8_t value = ptrIdx[i];
515            if (value || seenNonZero) {
516                *(s++)=hexToStr[value>>4];
517                *(s++)=hexToStr[value&0xF];
518                seenNonZero = 1;
519            }
520        }
521        if(hexType==HEX_0H) {
522         *(s++)='h';
523        }
524    }
525
526    *(s++)=0;
527    T_FileStream_writeLine(out, bitFieldStr);
528    return column;
529}
530
531static uint32_t
532write8(FileStream *out, uint8_t byte, uint32_t column) {
533    char s[4];
534    int i=0;
535
536    /* convert the byte value to a string */
537    if(byte>=100) {
538        s[i++]=(char)('0'+byte/100);
539        byte%=100;
540    }
541    if(i>0 || byte>=10) {
542        s[i++]=(char)('0'+byte/10);
543        byte%=10;
544    }
545    s[i++]=(char)('0'+byte);
546    s[i]=0;
547
548    /* write the value, possibly with comma and newline */
549    if(column==MAX_COLUMN) {
550        /* first byte */
551        column=1;
552    } else if(column<16) {
553        T_FileStream_writeLine(out, ",");
554        ++column;
555    } else {
556        T_FileStream_writeLine(out, ",\n");
557        column=1;
558    }
559    T_FileStream_writeLine(out, s);
560    return column;
561}
562
563#if U_PLATFORM == U_PF_OS400
564static uint32_t
565write8str(FileStream *out, uint8_t byte, uint32_t column) {
566    char s[8];
567
568    if (byte > 7)
569        sprintf(s, "\\x%X", byte);
570    else
571        sprintf(s, "\\%X", byte);
572
573    /* write the value, possibly with comma and newline */
574    if(column==MAX_COLUMN) {
575        /* first byte */
576        column=1;
577        T_FileStream_writeLine(out, "\"");
578    } else if(column<24) {
579        ++column;
580    } else {
581        T_FileStream_writeLine(out, "\"\n\"");
582        column=1;
583    }
584    T_FileStream_writeLine(out, s);
585    return column;
586}
587#endif
588
589static void
590getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
591    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
592
593    /* copy path */
594    if(destdir!=NULL && *destdir!=0) {
595        do {
596            *outFilename++=*destdir++;
597        } while(*destdir!=0);
598        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
599            *outFilename++=U_FILE_SEP_CHAR;
600        }
601        inFilename=basename;
602    } else {
603        while(inFilename<basename) {
604            *outFilename++=*inFilename++;
605        }
606    }
607
608    if(suffix==NULL) {
609        /* the filename does not have a suffix */
610        uprv_strcpy(entryName, inFilename);
611        if(optFilename != NULL) {
612          uprv_strcpy(outFilename, optFilename);
613        } else {
614          uprv_strcpy(outFilename, inFilename);
615        }
616        uprv_strcat(outFilename, newSuffix);
617    } else {
618        char *saveOutFilename = outFilename;
619        /* copy basename */
620        while(inFilename<suffix) {
621            if(*inFilename=='-') {
622                /* iSeries cannot have '-' in the .o objects. */
623                *outFilename++=*entryName++='_';
624                inFilename++;
625            }
626            else {
627                *outFilename++=*entryName++=*inFilename++;
628            }
629        }
630
631        /* replace '.' by '_' */
632        *outFilename++=*entryName++='_';
633        ++inFilename;
634
635        /* copy suffix */
636        while(*inFilename!=0) {
637            *outFilename++=*entryName++=*inFilename++;
638        }
639
640        *entryName=0;
641
642        if(optFilename != NULL) {
643            uprv_strcpy(saveOutFilename, optFilename);
644            uprv_strcat(saveOutFilename, newSuffix);
645        } else {
646            /* add ".c" */
647            uprv_strcpy(outFilename, newSuffix);
648        }
649    }
650}
651
652#ifdef CAN_GENERATE_OBJECTS
653static void
654getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
655    union {
656        char        bytes[2048];
657#ifdef U_ELF
658        Elf32_Ehdr  header32;
659        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
660#elif U_PLATFORM_HAS_WIN32_API
661        IMAGE_FILE_HEADER header;
662#endif
663    } buffer;
664
665    const char *filename;
666    FileStream *in;
667    int32_t length;
668
669#ifdef U_ELF
670
671#elif U_PLATFORM_HAS_WIN32_API
672    const IMAGE_FILE_HEADER *pHeader;
673#else
674#   error "Unknown platform for CAN_GENERATE_OBJECTS."
675#endif
676
677    if(optMatchArch != NULL) {
678        filename=optMatchArch;
679    } else {
680        /* set defaults */
681#ifdef U_ELF
682        /* set EM_386 because elf.h does not provide better defaults */
683        *pCPU=EM_386;
684        *pBits=32;
685        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
686#elif U_PLATFORM_HAS_WIN32_API
687/* _M_IA64 should be defined in windows.h */
688#   if defined(_M_IA64)
689        *pCPU=IMAGE_FILE_MACHINE_IA64;
690#   elif defined(_M_AMD64)
691        *pCPU=IMAGE_FILE_MACHINE_AMD64;
692#   else
693        *pCPU=IMAGE_FILE_MACHINE_I386;
694#   endif
695        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
696        *pIsBigEndian=FALSE;
697#else
698#   error "Unknown platform for CAN_GENERATE_OBJECTS."
699#endif
700        return;
701    }
702
703    in=T_FileStream_open(filename, "rb");
704    if(in==NULL) {
705        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
706        exit(U_FILE_ACCESS_ERROR);
707    }
708    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
709
710#ifdef U_ELF
711    if(length<sizeof(Elf32_Ehdr)) {
712        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
713        exit(U_UNSUPPORTED_ERROR);
714    }
715    if(
716        buffer.header32.e_ident[0]!=ELFMAG0 ||
717        buffer.header32.e_ident[1]!=ELFMAG1 ||
718        buffer.header32.e_ident[2]!=ELFMAG2 ||
719        buffer.header32.e_ident[3]!=ELFMAG3 ||
720        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
721    ) {
722        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
723        exit(U_UNSUPPORTED_ERROR);
724    }
725
726    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
727#ifdef U_ELF64
728    if(*pBits!=32 && *pBits!=64) {
729        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
730        exit(U_UNSUPPORTED_ERROR);
731    }
732#else
733    if(*pBits!=32) {
734        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
735        exit(U_UNSUPPORTED_ERROR);
736    }
737#endif
738
739    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
740    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
741        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
742        exit(U_UNSUPPORTED_ERROR);
743    }
744    /* TODO: Support byte swapping */
745
746    *pCPU=buffer.header32.e_machine;
747#elif U_PLATFORM_HAS_WIN32_API
748    if(length<sizeof(IMAGE_FILE_HEADER)) {
749        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
750        exit(U_UNSUPPORTED_ERROR);
751    }
752    /* TODO: Use buffer.header.  Keep aliasing legal.  */
753    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
754    *pCPU=pHeader->Machine;
755    /*
756     * The number of bits is implicit with the Machine value.
757     * *pBits is ignored in the calling code, so this need not be precise.
758     */
759    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
760    /* Windows always runs on little-endian CPUs. */
761    *pIsBigEndian=FALSE;
762#else
763#   error "Unknown platform for CAN_GENERATE_OBJECTS."
764#endif
765
766    T_FileStream_close(in);
767}
768
769U_CAPI void U_EXPORT2
770writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
771    /* common variables */
772    char buffer[4096], entry[40]={ 0 };
773    FileStream *in, *out;
774    const char *newSuffix;
775    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
776
777    uint16_t cpu, bits;
778    UBool makeBigEndian;
779
780    /* platform-specific variables and initialization code */
781#ifdef U_ELF
782    /* 32-bit Elf file header */
783    static Elf32_Ehdr header32={
784        {
785            /* e_ident[] */
786            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
787            ELFCLASS32,
788            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
789            EV_CURRENT /* EI_VERSION */
790        },
791        ET_REL,
792        EM_386,
793        EV_CURRENT, /* e_version */
794        0, /* e_entry */
795        0, /* e_phoff */
796        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
797        0, /* e_flags */
798        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
799        0, /* e_phentsize */
800        0, /* e_phnum */
801        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
802        5, /* e_shnum */
803        2 /* e_shstrndx */
804    };
805
806    /* 32-bit Elf section header table */
807    static Elf32_Shdr sectionHeaders32[5]={
808        { /* SHN_UNDEF */
809            0
810        },
811        { /* .symtab */
812            1, /* sh_name */
813            SHT_SYMTAB,
814            0, /* sh_flags */
815            0, /* sh_addr */
816            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
817            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
818            3, /* sh_link=sect hdr index of .strtab */
819            1, /* sh_info=One greater than the symbol table index of the last
820                * local symbol (with STB_LOCAL). */
821            4, /* sh_addralign */
822            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
823        },
824        { /* .shstrtab */
825            9, /* sh_name */
826            SHT_STRTAB,
827            0, /* sh_flags */
828            0, /* sh_addr */
829            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
830            40, /* sh_size */
831            0, /* sh_link */
832            0, /* sh_info */
833            1, /* sh_addralign */
834            0 /* sh_entsize */
835        },
836        { /* .strtab */
837            19, /* sh_name */
838            SHT_STRTAB,
839            0, /* sh_flags */
840            0, /* sh_addr */
841            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
842            (Elf32_Word)sizeof(entry), /* sh_size */
843            0, /* sh_link */
844            0, /* sh_info */
845            1, /* sh_addralign */
846            0 /* sh_entsize */
847        },
848        { /* .rodata */
849            27, /* sh_name */
850            SHT_PROGBITS,
851            SHF_ALLOC, /* sh_flags */
852            0, /* sh_addr */
853            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
854            0, /* sh_size */
855            0, /* sh_link */
856            0, /* sh_info */
857            16, /* sh_addralign */
858            0 /* sh_entsize */
859        }
860    };
861
862    /* symbol table */
863    static Elf32_Sym symbols32[2]={
864        { /* STN_UNDEF */
865            0
866        },
867        { /* data entry point */
868            1, /* st_name */
869            0, /* st_value */
870            0, /* st_size */
871            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
872            0, /* st_other */
873            4 /* st_shndx=index of related section table entry */
874        }
875    };
876
877    /* section header string table, with decimal string offsets */
878    static const char sectionStrings[40]=
879        /*  0 */ "\0"
880        /*  1 */ ".symtab\0"
881        /*  9 */ ".shstrtab\0"
882        /* 19 */ ".strtab\0"
883        /* 27 */ ".rodata\0"
884        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
885        /* 40: padded to multiple of 8 bytes */
886
887    /*
888     * Use entry[] for the string table which will contain only the
889     * entry point name.
890     * entry[0] must be 0 (NUL)
891     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
892     */
893
894    /* 16-align .rodata in the .o file, just in case */
895    static const char padding[16]={ 0 };
896    int32_t paddingSize;
897
898#ifdef U_ELF64
899    /* 64-bit Elf file header */
900    static Elf64_Ehdr header64={
901        {
902            /* e_ident[] */
903            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
904            ELFCLASS64,
905            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
906            EV_CURRENT /* EI_VERSION */
907        },
908        ET_REL,
909        EM_X86_64,
910        EV_CURRENT, /* e_version */
911        0, /* e_entry */
912        0, /* e_phoff */
913        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
914        0, /* e_flags */
915        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
916        0, /* e_phentsize */
917        0, /* e_phnum */
918        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
919        5, /* e_shnum */
920        2 /* e_shstrndx */
921    };
922
923    /* 64-bit Elf section header table */
924    static Elf64_Shdr sectionHeaders64[5]={
925        { /* SHN_UNDEF */
926            0
927        },
928        { /* .symtab */
929            1, /* sh_name */
930            SHT_SYMTAB,
931            0, /* sh_flags */
932            0, /* sh_addr */
933            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
934            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
935            3, /* sh_link=sect hdr index of .strtab */
936            1, /* sh_info=One greater than the symbol table index of the last
937                * local symbol (with STB_LOCAL). */
938            4, /* sh_addralign */
939            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
940        },
941        { /* .shstrtab */
942            9, /* sh_name */
943            SHT_STRTAB,
944            0, /* sh_flags */
945            0, /* sh_addr */
946            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
947            40, /* sh_size */
948            0, /* sh_link */
949            0, /* sh_info */
950            1, /* sh_addralign */
951            0 /* sh_entsize */
952        },
953        { /* .strtab */
954            19, /* sh_name */
955            SHT_STRTAB,
956            0, /* sh_flags */
957            0, /* sh_addr */
958            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
959            (Elf64_Xword)sizeof(entry), /* sh_size */
960            0, /* sh_link */
961            0, /* sh_info */
962            1, /* sh_addralign */
963            0 /* sh_entsize */
964        },
965        { /* .rodata */
966            27, /* sh_name */
967            SHT_PROGBITS,
968            SHF_ALLOC, /* sh_flags */
969            0, /* sh_addr */
970            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
971            0, /* sh_size */
972            0, /* sh_link */
973            0, /* sh_info */
974            16, /* sh_addralign */
975            0 /* sh_entsize */
976        }
977    };
978
979    /*
980     * 64-bit symbol table
981     * careful: different order of items compared with Elf32_sym!
982     */
983    static Elf64_Sym symbols64[2]={
984        { /* STN_UNDEF */
985            0
986        },
987        { /* data entry point */
988            1, /* st_name */
989            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
990            0, /* st_other */
991            4, /* st_shndx=index of related section table entry */
992            0, /* st_value */
993            0 /* st_size */
994        }
995    };
996
997#endif /* U_ELF64 */
998
999    /* entry[] have a leading NUL */
1000    entryOffset=1;
1001
1002    /* in the common code, count entryLength from after the NUL */
1003    entryLengthOffset=1;
1004
1005    newSuffix=".o";
1006
1007#elif U_PLATFORM_HAS_WIN32_API
1008    struct {
1009        IMAGE_FILE_HEADER fileHeader;
1010        IMAGE_SECTION_HEADER sections[2];
1011        char linkerOptions[100];
1012    } objHeader;
1013    IMAGE_SYMBOL symbols[1];
1014    struct {
1015        DWORD sizeofLongNames;
1016        char longNames[100];
1017    } symbolNames;
1018
1019    /*
1020     * entry sometimes have a leading '_'
1021     * overwritten if entryOffset==0 depending on the target platform
1022     * see check for cpu below
1023     */
1024    entry[0]='_';
1025
1026    newSuffix=".obj";
1027#else
1028#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1029#endif
1030
1031    /* deal with options, files and the entry point name */
1032    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1033    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%d\n", cpu, bits, makeBigEndian);
1034#if U_PLATFORM_HAS_WIN32_API
1035    if(cpu==IMAGE_FILE_MACHINE_I386) {
1036        entryOffset=1;
1037    }
1038#endif
1039
1040    in=T_FileStream_open(filename, "rb");
1041    if(in==NULL) {
1042        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1043        exit(U_FILE_ACCESS_ERROR);
1044    }
1045    size=T_FileStream_size(in);
1046
1047    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1048    if (outFilePath != NULL) {
1049        uprv_strcpy(outFilePath, buffer);
1050    }
1051
1052    if(optEntryPoint != NULL) {
1053        uprv_strcpy(entry+entryOffset, optEntryPoint);
1054        uprv_strcat(entry+entryOffset, "_dat");
1055    }
1056    /* turn dashes in the entry name into underscores */
1057    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1058    for(i=0; i<entryLength; ++i) {
1059        if(entry[entryLengthOffset+i]=='-') {
1060            entry[entryLengthOffset+i]='_';
1061        }
1062    }
1063
1064    /* open the output file */
1065    out=T_FileStream_open(buffer, "wb");
1066    if(out==NULL) {
1067        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1068        exit(U_FILE_ACCESS_ERROR);
1069    }
1070
1071#ifdef U_ELF
1072    if(bits==32) {
1073        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1074        header32.e_machine=cpu;
1075
1076        /* 16-align .rodata in the .o file, just in case */
1077        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1078        if(paddingSize!=0) {
1079                paddingSize=0x10-paddingSize;
1080                sectionHeaders32[4].sh_offset+=paddingSize;
1081        }
1082
1083        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1084
1085        symbols32[1].st_size=(Elf32_Word)size;
1086
1087        /* write .o headers */
1088        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1089        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1090        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1091    } else /* bits==64 */ {
1092#ifdef U_ELF64
1093        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1094        header64.e_machine=cpu;
1095
1096        /* 16-align .rodata in the .o file, just in case */
1097        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1098        if(paddingSize!=0) {
1099                paddingSize=0x10-paddingSize;
1100                sectionHeaders64[4].sh_offset+=paddingSize;
1101        }
1102
1103        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1104
1105        symbols64[1].st_size=(Elf64_Xword)size;
1106
1107        /* write .o headers */
1108        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1109        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1110        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1111#endif
1112    }
1113
1114    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1115    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1116    if(paddingSize!=0) {
1117        T_FileStream_write(out, padding, paddingSize);
1118    }
1119#elif U_PLATFORM_HAS_WIN32_API
1120    /* populate the .obj headers */
1121    uprv_memset(&objHeader, 0, sizeof(objHeader));
1122    uprv_memset(&symbols, 0, sizeof(symbols));
1123    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1124
1125    /* write the linker export directive */
1126    uprv_strcpy(objHeader.linkerOptions, "-export:");
1127    length=8;
1128    uprv_strcpy(objHeader.linkerOptions+length, entry);
1129    length+=entryLength;
1130    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1131    length+=6;
1132
1133    /* set the file header */
1134    objHeader.fileHeader.Machine=cpu;
1135    objHeader.fileHeader.NumberOfSections=2;
1136    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1137    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1138    objHeader.fileHeader.NumberOfSymbols=1;
1139
1140    /* set the section for the linker options */
1141    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1142    objHeader.sections[0].SizeOfRawData=length;
1143    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1144    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1145
1146    /* set the data section */
1147    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1148    objHeader.sections[1].SizeOfRawData=size;
1149    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1150    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1151
1152    /* set the symbol table */
1153    if(entryLength<=8) {
1154        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1155        symbolNames.sizeofLongNames=4;
1156    } else {
1157        symbols[0].N.Name.Short=0;
1158        symbols[0].N.Name.Long=4;
1159        symbolNames.sizeofLongNames=4+entryLength+1;
1160        uprv_strcpy(symbolNames.longNames, entry);
1161    }
1162    symbols[0].SectionNumber=2;
1163    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1164
1165    /* write the file header and the linker options section */
1166    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1167#else
1168#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1169#endif
1170
1171    /* copy the data file into section 2 */
1172    for(;;) {
1173        length=T_FileStream_read(in, buffer, sizeof(buffer));
1174        if(length==0) {
1175            break;
1176        }
1177        T_FileStream_write(out, buffer, (int32_t)length);
1178    }
1179
1180#if U_PLATFORM_HAS_WIN32_API
1181    /* write the symbol table */
1182    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1183    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1184#endif
1185
1186    if(T_FileStream_error(in)) {
1187        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1188        exit(U_FILE_ACCESS_ERROR);
1189    }
1190
1191    if(T_FileStream_error(out)) {
1192        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1193        exit(U_FILE_ACCESS_ERROR);
1194    }
1195
1196    T_FileStream_close(out);
1197    T_FileStream_close(in);
1198}
1199#endif
1200