1/******************************************************************************
2 *   Copyright (C) 2009-2010, International Business Machines
3 *   Corporation and others.  All Rights Reserved.
4 *******************************************************************************
5 */
6#include "unicode/utypes.h"
7
8#ifdef U_WINDOWS
9#   define VC_EXTRALEAN
10#   define WIN32_LEAN_AND_MEAN
11#   define NOUSER
12#   define NOSERVICE
13#   define NOIME
14#   define NOMCX
15#include <windows.h>
16#include <time.h>
17#   ifdef __GNUC__
18#       define WINDOWS_WITH_GNUC
19#   endif
20#endif
21
22#ifdef U_LINUX
23#   define U_ELF
24#endif
25
26#ifdef U_ELF
27#   include <elf.h>
28#   if defined(ELFCLASS64)
29#       define U_ELF64
30#   endif
31    /* Old elf.h headers may not have EM_X86_64, or have EM_X8664 instead. */
32#   ifndef EM_X86_64
33#       define EM_X86_64 62
34#   endif
35#   define ICU_ENTRY_OFFSET 0
36#endif
37
38#include <stdio.h>
39#include <stdlib.h>
40#include "unicode/putil.h"
41#include "cmemory.h"
42#include "cstring.h"
43#include "filestrm.h"
44#include "toolutil.h"
45#include "unicode/uclean.h"
46#include "uoptions.h"
47#include "pkg_genc.h"
48
49#define MAX_COLUMN ((uint32_t)(0xFFFFFFFFU))
50
51#define HEX_0X 0 /*  0x1234 */
52#define HEX_0H 1 /*  01234h */
53
54#if defined(U_WINDOWS) || defined(U_ELF)
55#define CAN_GENERATE_OBJECTS
56#endif
57
58/* prototypes --------------------------------------------------------------- */
59static void
60getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename);
61
62static uint32_t
63write8(FileStream *out, uint8_t byte, uint32_t column);
64
65static uint32_t
66write32(FileStream *out, uint32_t byte, uint32_t column);
67
68#ifdef OS400
69static uint32_t
70write8str(FileStream *out, uint8_t byte, uint32_t column);
71#endif
72/* -------------------------------------------------------------------------- */
73
74/*
75Creating Template Files for New Platforms
76
77Let the cc compiler help you get started.
78Compile this program
79    const unsigned int x[5] = {1, 2, 0xdeadbeef, 0xffffffff, 16};
80with the -S option to produce assembly output.
81
82For example, this will generate array.s:
83gcc -S array.c
84
85This will produce a .s file that may look like this:
86
87    .file   "array.c"
88    .version        "01.01"
89gcc2_compiled.:
90    .globl x
91    .section        .rodata
92    .align 4
93    .type    x,@object
94    .size    x,20
95x:
96    .long   1
97    .long   2
98    .long   -559038737
99    .long   -1
100    .long   16
101    .ident  "GCC: (GNU) 2.96 20000731 (Red Hat Linux 7.1 2.96-85)"
102
103which gives a starting point that will compile, and can be transformed
104to become the template, generally with some consulting of as docs and
105some experimentation.
106
107If you want ICU to automatically use this assembly, you should
108specify "GENCCODE_ASSEMBLY=-a name" in the specific config/mh-* file,
109where the name is the compiler or platform that you used in this
110assemblyHeader data structure.
111*/
112static const struct AssemblyType {
113    const char *name;
114    const char *header;
115    const char *beginLine;
116    const char *footer;
117    int8_t      hexType; /* HEX_0X or HEX_0h */
118} assemblyHeader[] = {
119    {"gcc",
120        ".globl %s\n"
121        "\t.section .note.GNU-stack,\"\",%%progbits\n"
122        "\t.section .rodata\n"
123        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
124        "\t.type %s,%%object\n"
125        "%s:\n\n",
126
127        ".long ","",HEX_0X
128    },
129    {"gcc-darwin",
130        /*"\t.section __TEXT,__text,regular,pure_instructions\n"
131        "\t.section __TEXT,__picsymbolstub1,symbol_stubs,pure_instructions,32\n"*/
132        ".globl _%s\n"
133        "\t.data\n"
134        "\t.const\n"
135        "\t.align 4\n"  /* 1<<4 = 16 */
136        "_%s:\n\n",
137
138        ".long ","",HEX_0X
139    },
140    {"gcc-cygwin",
141        ".globl _%s\n"
142        "\t.section .rodata\n"
143        "\t.align 8\n" /* Either align 8 bytes or 2^8 (256) bytes. 8 bytes is needed. */
144        "_%s:\n\n",
145
146        ".long ","",HEX_0X
147    },
148    {"sun",
149        "\t.section \".rodata\"\n"
150        "\t.align   8\n"
151        ".globl     %s\n"
152        "%s:\n",
153
154        ".word ","",HEX_0X
155    },
156    {"sun-x86",
157        "Drodata.rodata:\n"
158        "\t.type   Drodata.rodata,@object\n"
159        "\t.size   Drodata.rodata,0\n"
160        "\t.globl  %s\n"
161        "\t.align  8\n"
162        "%s:\n",
163
164        ".4byte ","",HEX_0X
165    },
166    {"xlc",
167        ".globl %s{RO}\n"
168        "\t.toc\n"
169        "%s:\n"
170        "\t.csect %s{RO}, 4\n",
171
172        ".long ","",HEX_0X
173    },
174    {"aCC-ia64",
175        "\t.file   \"%s.s\"\n"
176        "\t.type   %s,@object\n"
177        "\t.global %s\n"
178        "\t.secalias .abe$0.rodata, \".rodata\"\n"
179        "\t.section .abe$0.rodata = \"a\", \"progbits\"\n"
180        "\t.align  16\n"
181        "%s::\t",
182
183        "data4 ","",HEX_0X
184    },
185    {"aCC-parisc",
186        "\t.SPACE  $TEXT$\n"
187        "\t.SUBSPA $LIT$\n"
188        "%s\n"
189        "\t.EXPORT %s\n"
190        "\t.ALIGN  16\n",
191
192        ".WORD ","",HEX_0X
193    },
194    { "masm",
195      "\tTITLE %s\n"
196      "; generated by genccode\n"
197      ".386\n"
198      ".model flat\n"
199      "\tPUBLIC _%s\n"
200      "ICUDATA_%s\tSEGMENT READONLY PARA PUBLIC FLAT 'DATA'\n"
201      "\tALIGN 16\n"
202      "_%s\tLABEL DWORD\n",
203      "\tDWORD ","\nICUDATA_%s\tENDS\n\tEND\n",HEX_0H
204    }
205};
206
207static int32_t assemblyHeaderIndex = -1;
208static int32_t hexType = HEX_0X;
209
210U_CAPI UBool U_EXPORT2
211checkAssemblyHeaderName(const char* optAssembly) {
212    int32_t idx;
213    assemblyHeaderIndex = -1;
214    for (idx = 0; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
215        if (uprv_strcmp(optAssembly, assemblyHeader[idx].name) == 0) {
216            assemblyHeaderIndex = idx;
217            hexType = assemblyHeader[idx].hexType; /* set the hex type */
218            return TRUE;
219        }
220    }
221
222    return FALSE;
223}
224
225
226U_CAPI void U_EXPORT2
227printAssemblyHeadersToStdErr(void) {
228    int32_t idx;
229    fprintf(stderr, "%s", assemblyHeader[0].name);
230    for (idx = 1; idx < (int32_t)(sizeof(assemblyHeader)/sizeof(assemblyHeader[0])); idx++) {
231        fprintf(stderr, ", %s", assemblyHeader[idx].name);
232    }
233    fprintf(stderr,
234        ")\n");
235}
236
237U_CAPI void U_EXPORT2
238writeAssemblyCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optFilename, char *outFilePath) {
239    uint32_t column = MAX_COLUMN;
240    char entry[64];
241    uint32_t buffer[1024];
242    char *bufferStr = (char *)buffer;
243    FileStream *in, *out;
244    size_t i, length;
245
246    in=T_FileStream_open(filename, "rb");
247    if(in==NULL) {
248        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
249        exit(U_FILE_ACCESS_ERROR);
250    }
251
252    getOutFilename(filename, destdir, bufferStr, entry, ".s", optFilename);
253    out=T_FileStream_open(bufferStr, "w");
254    if(out==NULL) {
255        fprintf(stderr, "genccode: unable to open output file %s\n", bufferStr);
256        exit(U_FILE_ACCESS_ERROR);
257    }
258
259    if (outFilePath != NULL) {
260        uprv_strcpy(outFilePath, bufferStr);
261    }
262
263#ifdef WINDOWS_WITH_GNUC
264    /* Need to fix the file seperator character when using MinGW. */
265    swapFileSepChar(outFilePath, U_FILE_SEP_CHAR, '/');
266#endif
267
268    if(optEntryPoint != NULL) {
269        uprv_strcpy(entry, optEntryPoint);
270        uprv_strcat(entry, "_dat");
271    }
272
273    /* turn dashes or dots in the entry name into underscores */
274    length=uprv_strlen(entry);
275    for(i=0; i<length; ++i) {
276        if(entry[i]=='-' || entry[i]=='.') {
277            entry[i]='_';
278        }
279    }
280
281    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].header,
282        entry, entry, entry, entry,
283        entry, entry, entry, entry);
284    T_FileStream_writeLine(out, bufferStr);
285    T_FileStream_writeLine(out, assemblyHeader[assemblyHeaderIndex].beginLine);
286
287    for(;;) {
288        length=T_FileStream_read(in, buffer, sizeof(buffer));
289        if(length==0) {
290            break;
291        }
292        if (length != sizeof(buffer)) {
293            /* pad with extra 0's when at the end of the file */
294            for(i=0; i < (length % sizeof(uint32_t)); ++i) {
295                buffer[length+i] = 0;
296            }
297        }
298        for(i=0; i<(length/sizeof(buffer[0])); i++) {
299            column = write32(out, buffer[i], column);
300        }
301    }
302
303    T_FileStream_writeLine(out, "\n");
304
305    sprintf(bufferStr, assemblyHeader[assemblyHeaderIndex].footer,
306        entry, entry, entry, entry,
307        entry, entry, entry, entry);
308    T_FileStream_writeLine(out, bufferStr);
309
310    if(T_FileStream_error(in)) {
311        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
312        exit(U_FILE_ACCESS_ERROR);
313    }
314
315    if(T_FileStream_error(out)) {
316        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
317        exit(U_FILE_ACCESS_ERROR);
318    }
319
320    T_FileStream_close(out);
321    T_FileStream_close(in);
322}
323
324U_CAPI void U_EXPORT2
325writeCCode(const char *filename, const char *destdir, const char *optName, const char *optFilename, char *outFilePath) {
326    uint32_t column = MAX_COLUMN;
327    char buffer[4096], entry[64];
328    FileStream *in, *out;
329    size_t i, length;
330
331    in=T_FileStream_open(filename, "rb");
332    if(in==NULL) {
333        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
334        exit(U_FILE_ACCESS_ERROR);
335    }
336
337    if(optName != NULL) { /* prepend  'icudt28_' */
338      strcpy(entry, optName);
339      strcat(entry, "_");
340    } else {
341      entry[0] = 0;
342    }
343
344    getOutFilename(filename, destdir, buffer, entry+uprv_strlen(entry), ".c", optFilename);
345    if (outFilePath != NULL) {
346        uprv_strcpy(outFilePath, buffer);
347    }
348    out=T_FileStream_open(buffer, "w");
349    if(out==NULL) {
350        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
351        exit(U_FILE_ACCESS_ERROR);
352    }
353
354    /* turn dashes or dots in the entry name into underscores */
355    length=uprv_strlen(entry);
356    for(i=0; i<length; ++i) {
357        if(entry[i]=='-' || entry[i]=='.') {
358            entry[i]='_';
359        }
360    }
361
362#ifdef OS400
363    /*
364    TODO: Fix this once the compiler implements this feature. Keep in sync with udatamem.c
365
366    This is here because this platform can't currently put
367    const data into the read-only pages of an object or
368    shared library (service program). Only strings are allowed in read-only
369    pages, so we use char * strings to store the data.
370
371    In order to prevent the beginning of the data from ever matching the
372    magic numbers we must still use the initial double.
373    [grhoten 4/24/2003]
374    */
375    sprintf(buffer,
376        "#define U_DISABLE_RENAMING 1\n"
377        "#include \"unicode/umachine.h\"\n"
378        "U_CDECL_BEGIN\n"
379        "const struct {\n"
380        "    double bogus;\n"
381        "    const char *bytes; \n"
382        "} %s={ 0.0, \n",
383        entry);
384    T_FileStream_writeLine(out, buffer);
385
386    for(;;) {
387        length=T_FileStream_read(in, buffer, sizeof(buffer));
388        if(length==0) {
389            break;
390        }
391        for(i=0; i<length; ++i) {
392            column = write8str(out, (uint8_t)buffer[i], column);
393        }
394    }
395
396    T_FileStream_writeLine(out, "\"\n};\nU_CDECL_END\n");
397#else
398    /* Function renaming shouldn't be done in data */
399    sprintf(buffer,
400        "#define U_DISABLE_RENAMING 1\n"
401        "#include \"unicode/umachine.h\"\n"
402        "U_CDECL_BEGIN\n"
403        "const struct {\n"
404        "    double bogus;\n"
405        "    uint8_t bytes[%ld]; \n"
406        "} %s={ 0.0, {\n",
407        (long)T_FileStream_size(in), entry);
408    T_FileStream_writeLine(out, buffer);
409
410    for(;;) {
411        length=T_FileStream_read(in, buffer, sizeof(buffer));
412        if(length==0) {
413            break;
414        }
415        for(i=0; i<length; ++i) {
416            column = write8(out, (uint8_t)buffer[i], column);
417        }
418    }
419
420    T_FileStream_writeLine(out, "\n}\n};\nU_CDECL_END\n");
421#endif
422
423    if(T_FileStream_error(in)) {
424        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
425        exit(U_FILE_ACCESS_ERROR);
426    }
427
428    if(T_FileStream_error(out)) {
429        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
430        exit(U_FILE_ACCESS_ERROR);
431    }
432
433    T_FileStream_close(out);
434    T_FileStream_close(in);
435}
436
437static uint32_t
438write32(FileStream *out, uint32_t bitField, uint32_t column) {
439    int32_t i;
440    char bitFieldStr[64]; /* This is more bits than needed for a 32-bit number */
441    char *s = bitFieldStr;
442    uint8_t *ptrIdx = (uint8_t *)&bitField;
443    static const char hexToStr[16] = {
444        '0','1','2','3',
445        '4','5','6','7',
446        '8','9','A','B',
447        'C','D','E','F'
448    };
449
450    /* write the value, possibly with comma and newline */
451    if(column==MAX_COLUMN) {
452        /* first byte */
453        column=1;
454    } else if(column<32) {
455        *(s++)=',';
456        ++column;
457    } else {
458        *(s++)='\n';
459        uprv_strcpy(s, assemblyHeader[assemblyHeaderIndex].beginLine);
460        s+=uprv_strlen(s);
461        column=1;
462    }
463
464    if (bitField < 10) {
465        /* It's a small number. Don't waste the space for 0x */
466        *(s++)=hexToStr[bitField];
467    }
468    else {
469        int seenNonZero = 0; /* This is used to remove leading zeros */
470
471        if(hexType==HEX_0X) {
472         *(s++)='0';
473         *(s++)='x';
474        } else if(hexType==HEX_0H) {
475         *(s++)='0';
476        }
477
478        /* This creates a 32-bit field */
479#if U_IS_BIG_ENDIAN
480        for (i = 0; i < sizeof(uint32_t); i++)
481#else
482        for (i = sizeof(uint32_t)-1; i >= 0 ; i--)
483#endif
484        {
485            uint8_t value = ptrIdx[i];
486            if (value || seenNonZero) {
487                *(s++)=hexToStr[value>>4];
488                *(s++)=hexToStr[value&0xF];
489                seenNonZero = 1;
490            }
491        }
492        if(hexType==HEX_0H) {
493         *(s++)='h';
494        }
495    }
496
497    *(s++)=0;
498    T_FileStream_writeLine(out, bitFieldStr);
499    return column;
500}
501
502static uint32_t
503write8(FileStream *out, uint8_t byte, uint32_t column) {
504    char s[4];
505    int i=0;
506
507    /* convert the byte value to a string */
508    if(byte>=100) {
509        s[i++]=(char)('0'+byte/100);
510        byte%=100;
511    }
512    if(i>0 || byte>=10) {
513        s[i++]=(char)('0'+byte/10);
514        byte%=10;
515    }
516    s[i++]=(char)('0'+byte);
517    s[i]=0;
518
519    /* write the value, possibly with comma and newline */
520    if(column==MAX_COLUMN) {
521        /* first byte */
522        column=1;
523    } else if(column<16) {
524        T_FileStream_writeLine(out, ",");
525        ++column;
526    } else {
527        T_FileStream_writeLine(out, ",\n");
528        column=1;
529    }
530    T_FileStream_writeLine(out, s);
531    return column;
532}
533
534#ifdef OS400
535static uint32_t
536write8str(FileStream *out, uint8_t byte, uint32_t column) {
537    char s[8];
538
539    if (byte > 7)
540        sprintf(s, "\\x%X", byte);
541    else
542        sprintf(s, "\\%X", byte);
543
544    /* write the value, possibly with comma and newline */
545    if(column==MAX_COLUMN) {
546        /* first byte */
547        column=1;
548        T_FileStream_writeLine(out, "\"");
549    } else if(column<24) {
550        ++column;
551    } else {
552        T_FileStream_writeLine(out, "\"\n\"");
553        column=1;
554    }
555    T_FileStream_writeLine(out, s);
556    return column;
557}
558#endif
559
560static void
561getOutFilename(const char *inFilename, const char *destdir, char *outFilename, char *entryName, const char *newSuffix, const char *optFilename) {
562    const char *basename=findBasename(inFilename), *suffix=uprv_strrchr(basename, '.');
563
564    /* copy path */
565    if(destdir!=NULL && *destdir!=0) {
566        do {
567            *outFilename++=*destdir++;
568        } while(*destdir!=0);
569        if(*(outFilename-1)!=U_FILE_SEP_CHAR) {
570            *outFilename++=U_FILE_SEP_CHAR;
571        }
572        inFilename=basename;
573    } else {
574        while(inFilename<basename) {
575            *outFilename++=*inFilename++;
576        }
577    }
578
579    if(suffix==NULL) {
580        /* the filename does not have a suffix */
581        uprv_strcpy(entryName, inFilename);
582        if(optFilename != NULL) {
583          uprv_strcpy(outFilename, optFilename);
584        } else {
585          uprv_strcpy(outFilename, inFilename);
586        }
587        uprv_strcat(outFilename, newSuffix);
588    } else {
589        char *saveOutFilename = outFilename;
590        /* copy basename */
591        while(inFilename<suffix) {
592            if(*inFilename=='-') {
593                /* iSeries cannot have '-' in the .o objects. */
594                *outFilename++=*entryName++='_';
595                inFilename++;
596            }
597            else {
598                *outFilename++=*entryName++=*inFilename++;
599            }
600        }
601
602        /* replace '.' by '_' */
603        *outFilename++=*entryName++='_';
604        ++inFilename;
605
606        /* copy suffix */
607        while(*inFilename!=0) {
608            *outFilename++=*entryName++=*inFilename++;
609        }
610
611        *entryName=0;
612
613        if(optFilename != NULL) {
614            uprv_strcpy(saveOutFilename, optFilename);
615            uprv_strcat(saveOutFilename, newSuffix);
616        } else {
617            /* add ".c" */
618            uprv_strcpy(outFilename, newSuffix);
619        }
620    }
621}
622
623#ifdef CAN_GENERATE_OBJECTS
624static void
625getArchitecture(uint16_t *pCPU, uint16_t *pBits, UBool *pIsBigEndian, const char *optMatchArch) {
626    union {
627        char        bytes[2048];
628#ifdef U_ELF
629        Elf32_Ehdr  header32;
630        /* Elf32_Ehdr and ELF64_Ehdr are identical for the necessary fields. */
631#elif defined(U_WINDOWS)
632        IMAGE_FILE_HEADER header;
633#endif
634    } buffer;
635
636    const char *filename;
637    FileStream *in;
638    int32_t length;
639
640#ifdef U_ELF
641
642#elif defined(U_WINDOWS)
643    const IMAGE_FILE_HEADER *pHeader;
644#else
645#   error "Unknown platform for CAN_GENERATE_OBJECTS."
646#endif
647
648    if(optMatchArch != NULL) {
649        filename=optMatchArch;
650    } else {
651        /* set defaults */
652#ifdef U_ELF
653        /* set EM_386 because elf.h does not provide better defaults */
654        *pCPU=EM_386;
655        *pBits=32;
656        *pIsBigEndian=(UBool)(U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB);
657#elif defined(U_WINDOWS)
658/* _M_IA64 should be defined in windows.h */
659#   if defined(_M_IA64)
660        *pCPU=IMAGE_FILE_MACHINE_IA64;
661#   elif defined(_M_AMD64)
662        *pCPU=IMAGE_FILE_MACHINE_AMD64;
663#   else
664        *pCPU=IMAGE_FILE_MACHINE_I386;
665#   endif
666        *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
667        *pIsBigEndian=FALSE;
668#else
669#   error "Unknown platform for CAN_GENERATE_OBJECTS."
670#endif
671        return;
672    }
673
674    in=T_FileStream_open(filename, "rb");
675    if(in==NULL) {
676        fprintf(stderr, "genccode: unable to open match-arch file %s\n", filename);
677        exit(U_FILE_ACCESS_ERROR);
678    }
679    length=T_FileStream_read(in, buffer.bytes, sizeof(buffer.bytes));
680
681#ifdef U_ELF
682    if(length<sizeof(Elf32_Ehdr)) {
683        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
684        exit(U_UNSUPPORTED_ERROR);
685    }
686    if(
687        buffer.header32.e_ident[0]!=ELFMAG0 ||
688        buffer.header32.e_ident[1]!=ELFMAG1 ||
689        buffer.header32.e_ident[2]!=ELFMAG2 ||
690        buffer.header32.e_ident[3]!=ELFMAG3 ||
691        buffer.header32.e_ident[EI_CLASS]<ELFCLASS32 || buffer.header32.e_ident[EI_CLASS]>ELFCLASS64
692    ) {
693        fprintf(stderr, "genccode: match-arch file %s is not an ELF object file, or not supported\n", filename);
694        exit(U_UNSUPPORTED_ERROR);
695    }
696
697    *pBits= buffer.header32.e_ident[EI_CLASS]==ELFCLASS32 ? 32 : 64; /* only 32 or 64: see check above */
698#ifdef U_ELF64
699    if(*pBits!=32 && *pBits!=64) {
700        fprintf(stderr, "genccode: currently only supports 32-bit and 64-bit ELF format\n");
701        exit(U_UNSUPPORTED_ERROR);
702    }
703#else
704    if(*pBits!=32) {
705        fprintf(stderr, "genccode: built with elf.h missing 64-bit definitions\n");
706        exit(U_UNSUPPORTED_ERROR);
707    }
708#endif
709
710    *pIsBigEndian=(UBool)(buffer.header32.e_ident[EI_DATA]==ELFDATA2MSB);
711    if(*pIsBigEndian!=U_IS_BIG_ENDIAN) {
712        fprintf(stderr, "genccode: currently only same-endianness ELF formats are supported\n");
713        exit(U_UNSUPPORTED_ERROR);
714    }
715    /* TODO: Support byte swapping */
716
717    *pCPU=buffer.header32.e_machine;
718#elif defined(U_WINDOWS)
719    if(length<sizeof(IMAGE_FILE_HEADER)) {
720        fprintf(stderr, "genccode: match-arch file %s is too short\n", filename);
721        exit(U_UNSUPPORTED_ERROR);
722    }
723    /* TODO: Use buffer.header.  Keep aliasing legal.  */
724    pHeader=(const IMAGE_FILE_HEADER *)buffer.bytes;
725    *pCPU=pHeader->Machine;
726    /*
727     * The number of bits is implicit with the Machine value.
728     * *pBits is ignored in the calling code, so this need not be precise.
729     */
730    *pBits= *pCPU==IMAGE_FILE_MACHINE_I386 ? 32 : 64;
731    /* Windows always runs on little-endian CPUs. */
732    *pIsBigEndian=FALSE;
733#else
734#   error "Unknown platform for CAN_GENERATE_OBJECTS."
735#endif
736
737    T_FileStream_close(in);
738}
739
740U_CAPI void U_EXPORT2
741writeObjectCode(const char *filename, const char *destdir, const char *optEntryPoint, const char *optMatchArch, const char *optFilename, char *outFilePath) {
742    /* common variables */
743    char buffer[4096], entry[40]={ 0 };
744    FileStream *in, *out;
745    const char *newSuffix;
746    int32_t i, entryLength, length, size, entryOffset=0, entryLengthOffset=0;
747
748    uint16_t cpu, bits;
749    UBool makeBigEndian;
750
751    /* platform-specific variables and initialization code */
752#ifdef U_ELF
753    /* 32-bit Elf file header */
754    static Elf32_Ehdr header32={
755        {
756            /* e_ident[] */
757            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
758            ELFCLASS32,
759            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
760            EV_CURRENT /* EI_VERSION */
761        },
762        ET_REL,
763        EM_386,
764        EV_CURRENT, /* e_version */
765        0, /* e_entry */
766        0, /* e_phoff */
767        (Elf32_Off)sizeof(Elf32_Ehdr), /* e_shoff */
768        0, /* e_flags */
769        (Elf32_Half)sizeof(Elf32_Ehdr), /* eh_size */
770        0, /* e_phentsize */
771        0, /* e_phnum */
772        (Elf32_Half)sizeof(Elf32_Shdr), /* e_shentsize */
773        5, /* e_shnum */
774        2 /* e_shstrndx */
775    };
776
777    /* 32-bit Elf section header table */
778    static Elf32_Shdr sectionHeaders32[5]={
779        { /* SHN_UNDEF */
780            0
781        },
782        { /* .symtab */
783            1, /* sh_name */
784            SHT_SYMTAB,
785            0, /* sh_flags */
786            0, /* sh_addr */
787            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)), /* sh_offset */
788            (Elf32_Word)(2*sizeof(Elf32_Sym)), /* sh_size */
789            3, /* sh_link=sect hdr index of .strtab */
790            1, /* sh_info=One greater than the symbol table index of the last
791                * local symbol (with STB_LOCAL). */
792            4, /* sh_addralign */
793            (Elf32_Word)(sizeof(Elf32_Sym)) /* sh_entsize */
794        },
795        { /* .shstrtab */
796            9, /* sh_name */
797            SHT_STRTAB,
798            0, /* sh_flags */
799            0, /* sh_addr */
800            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)), /* sh_offset */
801            40, /* sh_size */
802            0, /* sh_link */
803            0, /* sh_info */
804            1, /* sh_addralign */
805            0 /* sh_entsize */
806        },
807        { /* .strtab */
808            19, /* sh_name */
809            SHT_STRTAB,
810            0, /* sh_flags */
811            0, /* sh_addr */
812            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40), /* sh_offset */
813            (Elf32_Word)sizeof(entry), /* sh_size */
814            0, /* sh_link */
815            0, /* sh_info */
816            1, /* sh_addralign */
817            0 /* sh_entsize */
818        },
819        { /* .rodata */
820            27, /* sh_name */
821            SHT_PROGBITS,
822            SHF_ALLOC, /* sh_flags */
823            0, /* sh_addr */
824            (Elf32_Off)(sizeof(header32)+sizeof(sectionHeaders32)+2*sizeof(Elf32_Sym)+40+sizeof(entry)), /* sh_offset */
825            0, /* sh_size */
826            0, /* sh_link */
827            0, /* sh_info */
828            16, /* sh_addralign */
829            0 /* sh_entsize */
830        }
831    };
832
833    /* symbol table */
834    static Elf32_Sym symbols32[2]={
835        { /* STN_UNDEF */
836            0
837        },
838        { /* data entry point */
839            1, /* st_name */
840            0, /* st_value */
841            0, /* st_size */
842            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
843            0, /* st_other */
844            4 /* st_shndx=index of related section table entry */
845        }
846    };
847
848    /* section header string table, with decimal string offsets */
849    static const char sectionStrings[40]=
850        /*  0 */ "\0"
851        /*  1 */ ".symtab\0"
852        /*  9 */ ".shstrtab\0"
853        /* 19 */ ".strtab\0"
854        /* 27 */ ".rodata\0"
855        /* 35 */ "\0\0\0\0"; /* contains terminating NUL */
856        /* 40: padded to multiple of 8 bytes */
857
858    /*
859     * Use entry[] for the string table which will contain only the
860     * entry point name.
861     * entry[0] must be 0 (NUL)
862     * The entry point name can be up to 38 characters long (sizeof(entry)-2).
863     */
864
865    /* 16-align .rodata in the .o file, just in case */
866    static const char padding[16]={ 0 };
867    int32_t paddingSize;
868
869#ifdef U_ELF64
870    /* 64-bit Elf file header */
871    static Elf64_Ehdr header64={
872        {
873            /* e_ident[] */
874            ELFMAG0, ELFMAG1, ELFMAG2, ELFMAG3,
875            ELFCLASS64,
876            U_IS_BIG_ENDIAN ? ELFDATA2MSB : ELFDATA2LSB,
877            EV_CURRENT /* EI_VERSION */
878        },
879        ET_REL,
880        EM_X86_64,
881        EV_CURRENT, /* e_version */
882        0, /* e_entry */
883        0, /* e_phoff */
884        (Elf64_Off)sizeof(Elf64_Ehdr), /* e_shoff */
885        0, /* e_flags */
886        (Elf64_Half)sizeof(Elf64_Ehdr), /* eh_size */
887        0, /* e_phentsize */
888        0, /* e_phnum */
889        (Elf64_Half)sizeof(Elf64_Shdr), /* e_shentsize */
890        5, /* e_shnum */
891        2 /* e_shstrndx */
892    };
893
894    /* 64-bit Elf section header table */
895    static Elf64_Shdr sectionHeaders64[5]={
896        { /* SHN_UNDEF */
897            0
898        },
899        { /* .symtab */
900            1, /* sh_name */
901            SHT_SYMTAB,
902            0, /* sh_flags */
903            0, /* sh_addr */
904            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)), /* sh_offset */
905            (Elf64_Xword)(2*sizeof(Elf64_Sym)), /* sh_size */
906            3, /* sh_link=sect hdr index of .strtab */
907            1, /* sh_info=One greater than the symbol table index of the last
908                * local symbol (with STB_LOCAL). */
909            4, /* sh_addralign */
910            (Elf64_Xword)(sizeof(Elf64_Sym)) /* sh_entsize */
911        },
912        { /* .shstrtab */
913            9, /* sh_name */
914            SHT_STRTAB,
915            0, /* sh_flags */
916            0, /* sh_addr */
917            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)), /* sh_offset */
918            40, /* sh_size */
919            0, /* sh_link */
920            0, /* sh_info */
921            1, /* sh_addralign */
922            0 /* sh_entsize */
923        },
924        { /* .strtab */
925            19, /* sh_name */
926            SHT_STRTAB,
927            0, /* sh_flags */
928            0, /* sh_addr */
929            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40), /* sh_offset */
930            (Elf64_Xword)sizeof(entry), /* sh_size */
931            0, /* sh_link */
932            0, /* sh_info */
933            1, /* sh_addralign */
934            0 /* sh_entsize */
935        },
936        { /* .rodata */
937            27, /* sh_name */
938            SHT_PROGBITS,
939            SHF_ALLOC, /* sh_flags */
940            0, /* sh_addr */
941            (Elf64_Off)(sizeof(header64)+sizeof(sectionHeaders64)+2*sizeof(Elf64_Sym)+40+sizeof(entry)), /* sh_offset */
942            0, /* sh_size */
943            0, /* sh_link */
944            0, /* sh_info */
945            16, /* sh_addralign */
946            0 /* sh_entsize */
947        }
948    };
949
950    /*
951     * 64-bit symbol table
952     * careful: different order of items compared with Elf32_sym!
953     */
954    static Elf64_Sym symbols64[2]={
955        { /* STN_UNDEF */
956            0
957        },
958        { /* data entry point */
959            1, /* st_name */
960            ELF64_ST_INFO(STB_GLOBAL, STT_OBJECT),
961            0, /* st_other */
962            4, /* st_shndx=index of related section table entry */
963            0, /* st_value */
964            0 /* st_size */
965        }
966    };
967
968#endif /* U_ELF64 */
969
970    /* entry[] have a leading NUL */
971    entryOffset=1;
972
973    /* in the common code, count entryLength from after the NUL */
974    entryLengthOffset=1;
975
976    newSuffix=".o";
977
978#elif defined(U_WINDOWS)
979    struct {
980        IMAGE_FILE_HEADER fileHeader;
981        IMAGE_SECTION_HEADER sections[2];
982        char linkerOptions[100];
983    } objHeader;
984    IMAGE_SYMBOL symbols[1];
985    struct {
986        DWORD sizeofLongNames;
987        char longNames[100];
988    } symbolNames;
989
990    /*
991     * entry sometimes have a leading '_'
992     * overwritten if entryOffset==0 depending on the target platform
993     * see check for cpu below
994     */
995    entry[0]='_';
996
997    newSuffix=".obj";
998#else
999#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1000#endif
1001
1002    /* deal with options, files and the entry point name */
1003    getArchitecture(&cpu, &bits, &makeBigEndian, optMatchArch);
1004    printf("genccode: --match-arch cpu=%hu bits=%hu big-endian=%hu\n", cpu, bits, makeBigEndian);
1005#ifdef U_WINDOWS
1006    if(cpu==IMAGE_FILE_MACHINE_I386) {
1007        entryOffset=1;
1008    }
1009#endif
1010
1011    in=T_FileStream_open(filename, "rb");
1012    if(in==NULL) {
1013        fprintf(stderr, "genccode: unable to open input file %s\n", filename);
1014        exit(U_FILE_ACCESS_ERROR);
1015    }
1016    size=T_FileStream_size(in);
1017
1018    getOutFilename(filename, destdir, buffer, entry+entryOffset, newSuffix, optFilename);
1019    if (outFilePath != NULL) {
1020        uprv_strcpy(outFilePath, buffer);
1021    }
1022
1023    if(optEntryPoint != NULL) {
1024        uprv_strcpy(entry+entryOffset, optEntryPoint);
1025        uprv_strcat(entry+entryOffset, "_dat");
1026    }
1027    /* turn dashes in the entry name into underscores */
1028    entryLength=(int32_t)uprv_strlen(entry+entryLengthOffset);
1029    for(i=0; i<entryLength; ++i) {
1030        if(entry[entryLengthOffset+i]=='-') {
1031            entry[entryLengthOffset+i]='_';
1032        }
1033    }
1034
1035    /* open the output file */
1036    out=T_FileStream_open(buffer, "wb");
1037    if(out==NULL) {
1038        fprintf(stderr, "genccode: unable to open output file %s\n", buffer);
1039        exit(U_FILE_ACCESS_ERROR);
1040    }
1041
1042#ifdef U_ELF
1043    if(bits==32) {
1044        header32.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1045        header32.e_machine=cpu;
1046
1047        /* 16-align .rodata in the .o file, just in case */
1048        paddingSize=sectionHeaders32[4].sh_offset & 0xf;
1049        if(paddingSize!=0) {
1050                paddingSize=0x10-paddingSize;
1051                sectionHeaders32[4].sh_offset+=paddingSize;
1052        }
1053
1054        sectionHeaders32[4].sh_size=(Elf32_Word)size;
1055
1056        symbols32[1].st_size=(Elf32_Word)size;
1057
1058        /* write .o headers */
1059        T_FileStream_write(out, &header32, (int32_t)sizeof(header32));
1060        T_FileStream_write(out, sectionHeaders32, (int32_t)sizeof(sectionHeaders32));
1061        T_FileStream_write(out, symbols32, (int32_t)sizeof(symbols32));
1062    } else /* bits==64 */ {
1063#ifdef U_ELF64
1064        header64.e_ident[EI_DATA]= makeBigEndian ? ELFDATA2MSB : ELFDATA2LSB;
1065        header64.e_machine=cpu;
1066
1067        /* 16-align .rodata in the .o file, just in case */
1068        paddingSize=sectionHeaders64[4].sh_offset & 0xf;
1069        if(paddingSize!=0) {
1070                paddingSize=0x10-paddingSize;
1071                sectionHeaders64[4].sh_offset+=paddingSize;
1072        }
1073
1074        sectionHeaders64[4].sh_size=(Elf64_Xword)size;
1075
1076        symbols64[1].st_size=(Elf64_Xword)size;
1077
1078        /* write .o headers */
1079        T_FileStream_write(out, &header64, (int32_t)sizeof(header64));
1080        T_FileStream_write(out, sectionHeaders64, (int32_t)sizeof(sectionHeaders64));
1081        T_FileStream_write(out, symbols64, (int32_t)sizeof(symbols64));
1082#endif
1083    }
1084
1085    T_FileStream_write(out, sectionStrings, (int32_t)sizeof(sectionStrings));
1086    T_FileStream_write(out, entry, (int32_t)sizeof(entry));
1087    if(paddingSize!=0) {
1088        T_FileStream_write(out, padding, paddingSize);
1089    }
1090#elif defined(U_WINDOWS)
1091    /* populate the .obj headers */
1092    uprv_memset(&objHeader, 0, sizeof(objHeader));
1093    uprv_memset(&symbols, 0, sizeof(symbols));
1094    uprv_memset(&symbolNames, 0, sizeof(symbolNames));
1095
1096    /* write the linker export directive */
1097    uprv_strcpy(objHeader.linkerOptions, "-export:");
1098    length=8;
1099    uprv_strcpy(objHeader.linkerOptions+length, entry);
1100    length+=entryLength;
1101    uprv_strcpy(objHeader.linkerOptions+length, ",data ");
1102    length+=6;
1103
1104    /* set the file header */
1105    objHeader.fileHeader.Machine=cpu;
1106    objHeader.fileHeader.NumberOfSections=2;
1107    objHeader.fileHeader.TimeDateStamp=(DWORD)time(NULL);
1108    objHeader.fileHeader.PointerToSymbolTable=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length+size; /* start of symbol table */
1109    objHeader.fileHeader.NumberOfSymbols=1;
1110
1111    /* set the section for the linker options */
1112    uprv_strncpy((char *)objHeader.sections[0].Name, ".drectve", 8);
1113    objHeader.sections[0].SizeOfRawData=length;
1114    objHeader.sections[0].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER;
1115    objHeader.sections[0].Characteristics=IMAGE_SCN_LNK_INFO|IMAGE_SCN_LNK_REMOVE|IMAGE_SCN_ALIGN_1BYTES;
1116
1117    /* set the data section */
1118    uprv_strncpy((char *)objHeader.sections[1].Name, ".rdata", 6);
1119    objHeader.sections[1].SizeOfRawData=size;
1120    objHeader.sections[1].PointerToRawData=IMAGE_SIZEOF_FILE_HEADER+2*IMAGE_SIZEOF_SECTION_HEADER+length;
1121    objHeader.sections[1].Characteristics=IMAGE_SCN_CNT_INITIALIZED_DATA|IMAGE_SCN_ALIGN_16BYTES|IMAGE_SCN_MEM_READ;
1122
1123    /* set the symbol table */
1124    if(entryLength<=8) {
1125        uprv_strncpy((char *)symbols[0].N.ShortName, entry, entryLength);
1126        symbolNames.sizeofLongNames=4;
1127    } else {
1128        symbols[0].N.Name.Short=0;
1129        symbols[0].N.Name.Long=4;
1130        symbolNames.sizeofLongNames=4+entryLength+1;
1131        uprv_strcpy(symbolNames.longNames, entry);
1132    }
1133    symbols[0].SectionNumber=2;
1134    symbols[0].StorageClass=IMAGE_SYM_CLASS_EXTERNAL;
1135
1136    /* write the file header and the linker options section */
1137    T_FileStream_write(out, &objHeader, objHeader.sections[1].PointerToRawData);
1138#else
1139#   error "Unknown platform for CAN_GENERATE_OBJECTS."
1140#endif
1141
1142    /* copy the data file into section 2 */
1143    for(;;) {
1144        length=T_FileStream_read(in, buffer, sizeof(buffer));
1145        if(length==0) {
1146            break;
1147        }
1148        T_FileStream_write(out, buffer, (int32_t)length);
1149    }
1150
1151#ifdef U_WINDOWS
1152    /* write the symbol table */
1153    T_FileStream_write(out, symbols, IMAGE_SIZEOF_SYMBOL);
1154    T_FileStream_write(out, &symbolNames, symbolNames.sizeofLongNames);
1155#endif
1156
1157    if(T_FileStream_error(in)) {
1158        fprintf(stderr, "genccode: file read error while generating from file %s\n", filename);
1159        exit(U_FILE_ACCESS_ERROR);
1160    }
1161
1162    if(T_FileStream_error(out)) {
1163        fprintf(stderr, "genccode: file write error while generating from file %s\n", filename);
1164        exit(U_FILE_ACCESS_ERROR);
1165    }
1166
1167    T_FileStream_close(out);
1168    T_FileStream_close(in);
1169}
1170#endif
1171