acc.cpp revision 69796b6c847dd15bdbc1d0a563e421562c2c56f3
1/*
2 Obfuscated Tiny C Compiler
3
4 Copyright (C) 2001-2003 Fabrice Bellard
5
6 This software is provided 'as-is', without any express or implied
7 warranty.  In no event will the authors be held liable for any damages
8 arising from the use of this software.
9
10 Permission is granted to anyone to use this software for any purpose,
11 including commercial applications, and to alter it and redistribute it
12 freely, subject to the following restrictions:
13
14 1. The origin of this software must not be misrepresented; you must not
15 claim that you wrote the original software. If you use this software
16 in a product, an acknowledgment in the product and its documentation
17 *is* required.
18 2. Altered source versions must be plainly marked as such, and must not be
19 misrepresented as being the original software.
20 3. This notice may not be removed or altered from any source distribution.
21 */
22
23#include <ctype.h>
24#include <dlfcn.h>
25#include <stdarg.h>
26#include <stdio.h>
27#include <stdlib.h>
28#include <string.h>
29
30#if defined(__arm__)
31#include <unistd.h>
32#endif
33
34#include "disassem.h"
35
36namespace acc {
37
38class compiler {
39    class CodeBuf {
40        char* ind;
41        char* pProgramBase;
42
43        void release() {
44            if (pProgramBase != 0) {
45                free(pProgramBase);
46                pProgramBase = 0;
47            }
48        }
49
50    public:
51        CodeBuf() {
52            pProgramBase = 0;
53            ind = 0;
54        }
55
56        ~CodeBuf() {
57            release();
58        }
59
60        void init(int size) {
61            release();
62            pProgramBase = (char*) calloc(1, size);
63            ind = pProgramBase;
64        }
65
66        void o(int n) {
67            /* cannot use unsigned, so we must do a hack */
68            while (n && n != -1) {
69                *ind++ = n;
70                n = n >> 8;
71            }
72        }
73
74        int o4(int n) {
75            int result = (int) ind;
76            * (int*) ind = n;
77            ind += 4;
78            return result;
79        }
80
81        /*
82         * Output a byte. Handles all values, 0..ff.
83         */
84        void ob(int n) {
85            *ind++ = n;
86        }
87
88        /* output a symbol and patch all calls to it */
89        void gsym(int t) {
90            int n;
91            while (t) {
92                n = *(int *) t; /* next value */
93                *(int *) t = ((int) ind) - t - 4;
94                t = n;
95            }
96        }
97
98        /* psym is used to put an instruction with a data field which is a
99         reference to a symbol. It is in fact the same as oad ! */
100        int psym(int n, int t) {
101            return oad(n, t);
102        }
103
104        /* instruction + address */
105        int oad(int n, int t) {
106            o(n);
107            *(int *) ind = t;
108            t = (int) ind;
109            ind = ind + 4;
110            return t;
111        }
112
113        inline void* getBase() {
114            return (void*) pProgramBase;
115        }
116
117        int getSize() {
118            return ind - pProgramBase;
119        }
120
121        int getPC() {
122            return (int) ind;
123        }
124    };
125
126    class CodeGenerator {
127    public:
128        CodeGenerator() {}
129        virtual ~CodeGenerator() {}
130
131        virtual void init(CodeBuf* pCodeBuf) {
132            this->pCodeBuf = pCodeBuf;
133        }
134
135        /* returns address to patch with local variable size
136        */
137        virtual int functionEntry(int argCount) = 0;
138
139        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0;
140
141        /* load immediate value */
142        virtual void li(int t) = 0;
143
144        virtual int gjmp(int t) = 0;
145
146        /* l = 0: je, l == 1: jne */
147        virtual int gtst(bool l, int t) = 0;
148
149        virtual void gcmp(int op) = 0;
150
151        virtual void genOp(int op) = 0;
152
153        virtual void clearECX() = 0;
154
155        virtual void pushEAX() = 0;
156
157        virtual void popECX() = 0;
158
159        virtual void storeEAXToAddressECX(bool isInt) = 0;
160
161        virtual void loadEAXIndirect(bool isInt) = 0;
162
163        virtual void leaEAX(int ea) = 0;
164
165        virtual void storeEAX(int ea) = 0;
166
167        virtual void loadEAX(int ea) = 0;
168
169        virtual void postIncrementOrDecrement(int n, int op) = 0;
170
171        virtual int beginFunctionCallArguments() = 0;
172
173        virtual void endFunctionCallArguments(int a, int l) = 0;
174
175        virtual void storeEAToArg(int l) = 0;
176
177        virtual int callForward(int symbol) = 0;
178
179        virtual void callRelative(int t) = 0;
180
181        virtual void callIndirect(int l) = 0;
182
183        virtual void adjustStackAfterCall(int l) = 0;
184
185        virtual int disassemble(FILE* out) = 0;
186
187        /* output a symbol and patch all calls to it */
188        virtual void gsym(int t) {
189            pCodeBuf->gsym(t);
190        }
191
192        virtual int finishCompile() {
193#if defined(__arm__)
194            const long base = long(pCodeBuf->getBase());
195            const long curr = base + long(pCodeBuf->getSize());
196            int err = cacheflush(base, curr, 0);
197            return err;
198#else
199            return 0;
200#endif
201        }
202
203        /**
204         * Adjust relative branches by this amount.
205         */
206        virtual int jumpOffset() = 0;
207
208    protected:
209        void o(int n) {
210            pCodeBuf->o(n);
211        }
212
213        /*
214         * Output a byte. Handles all values, 0..ff.
215         */
216        void ob(int n) {
217            pCodeBuf->ob(n);
218        }
219
220        /* psym is used to put an instruction with a data field which is a
221         reference to a symbol. It is in fact the same as oad ! */
222        int psym(int n, int t) {
223            return oad(n, t);
224        }
225
226        /* instruction + address */
227        int oad(int n, int t) {
228            return pCodeBuf->oad(n,t);
229        }
230
231        int getBase() {
232            return (int) pCodeBuf->getBase();
233        }
234
235        int getPC() {
236            return pCodeBuf->getPC();
237        }
238
239        int o4(int data) {
240            return pCodeBuf->o4(data);
241        }
242    private:
243        CodeBuf* pCodeBuf;
244    };
245
246    class ARMCodeGenerator : public CodeGenerator {
247    public:
248        ARMCodeGenerator() {}
249        virtual ~ARMCodeGenerator() {}
250
251        /* returns address to patch with local variable size
252        */
253        virtual int functionEntry(int argCount) {
254            fprintf(stderr, "functionEntry(%d);\n", argCount);
255            // sp -> arg4 arg5 ...
256            // Push our register-based arguments back on the stack
257            if (argCount > 0) {
258                int regArgCount = argCount <= 4 ? argCount : 4;
259                o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd    sp!, {}
260            }
261            // sp -> arg0 arg1 ...
262            o4(0xE92D4800); // stmfd sp!, {fp, lr}
263            // sp, fp -> oldfp, retadr, arg0 arg1 ....
264            o4(0xE1A0B00D); // mov    fp, sp
265            return o4(0xE24DD000); // sub    sp, sp, # <local variables>
266        }
267
268        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
269            fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
270            // Patch local variable allocation code:
271            if (localVariableSize < 0 || localVariableSize > 255) {
272                error("LocalVariableSize");
273            }
274            *(char*) (localVariableAddress) = localVariableSize;
275
276            // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
277            o4(0xE1A0E00B); // mov lr, fp
278            o4(0xE59BB000); // ldr fp, [fp]
279            o4(0xE28ED004); // add sp, lr, #4
280            // sp -> retadr, arg0, ...
281            o4(0xE8BD4000); // ldmfd    sp!, {lr}
282            // sp -> arg0 ....
283            if (argCount > 0) {
284                // We store the PC into the lr so we can adjust the sp before
285                // returning. (We need to pull off the registers we pushed
286                // earlier. We don't need to actually store them anywhere,
287                // just adjust the stack.
288                int regArgCount = argCount <= 4 ? argCount : 4;
289                o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
290            }
291            o4(0xE12FFF1E); // bx lr
292        }
293
294        /* load immediate value */
295        virtual void li(int t) {
296            fprintf(stderr, "li(%d);\n", t);
297            if (t >= 0 && t < 255) {
298                 o4(0xE3A00000 + t); // mov    r0, #0
299            } else if (t >= -256 && t < 0) {
300                // mvn means move constant ^ ~0
301                o4(0xE3E00001 - t); // mvn    r0, #0
302            } else {
303                  o4(0xE51F0000); //         ldr    r0, .L3
304                  o4(0xEA000000); //         b .L99
305                  o4(t);          // .L3:   .word 0
306                                  // .L99:
307            }
308        }
309
310        virtual int gjmp(int t) {
311            fprintf(stderr, "gjmp(%d);\n", t);
312            return o4(0xEA000000 + encodeAddress(t)); // b .L33
313        }
314
315        /* l = 0: je, l == 1: jne */
316        virtual int gtst(bool l, int t) {
317            fprintf(stderr, "gtst(%d, %d);\n", l, t);
318            error("Unimplemented");
319            o(0x0fc085); /* test %eax, %eax, je/jne xxx */
320            return psym(0x84 + l, t);
321        }
322
323        virtual void gcmp(int op) {
324            fprintf(stderr, "gcmp(%d);\n", op);
325            error("Unimplemented");
326#if 0
327            int t = decodeOp(op);
328            o(0xc139); /* cmp %eax,%ecx */
329            li(0);
330            o(0x0f); /* setxx %al */
331            o(t + 0x90);
332            o(0xc0);
333#endif
334        }
335
336        virtual void genOp(int op) {
337            fprintf(stderr, "genOp(%d);\n", op);
338            switch(op) {
339            case OP_MUL:
340                o4(0x0E0000091); // mul     r0,r1,r0
341                break;
342            case OP_PLUS:
343                o4(0xE0810000);  // add     r0,r1,r0
344                break;
345            case OP_MINUS:
346                o4(0xE0410000);  // sub     r0,r1,r0
347                break;
348            case OP_SHIFT_LEFT:
349                o4(0xE1A00011);  // lsl     r0,r1,r0
350                break;
351            case OP_SHIFT_RIGHT:
352                o4(0xE1A00051);  // asr     r0,r1,r0
353                break;
354            case OP_BIT_AND:
355                o4(0xE0010000);  // and     r0,r1,r0
356                break;
357            case OP_BIT_XOR:
358                o4(0xE0210000);  // eor     r0,r1,r0
359                break;
360            case OP_BIT_OR:
361                o4(0xE1810000);  // orr     r0,r1,r0
362                break;
363            case OP_BIT_NOT:
364                o4(0xE1E00000);  // mvn     r0, r0
365                break;
366            default:
367                error("Unimplemented op %d\n", op);
368                break;
369            }
370#if 0
371            o(decodeOp(op));
372            if (op == OP_MOD)
373                o(0x92); /* xchg %edx, %eax */
374#endif
375        }
376
377        virtual void clearECX() {
378            fprintf(stderr, "clearECX();\n");
379            o4(0xE3A01000);  // mov    r1, #0
380        }
381
382        virtual void pushEAX() {
383            fprintf(stderr, "pushEAX();\n");
384            o4(0xE92D0001);  // stmfd   sp!,{r0}
385        }
386
387        virtual void popECX() {
388            fprintf(stderr, "popECX();\n");
389            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
390        }
391
392        virtual void storeEAXToAddressECX(bool isInt) {
393            fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt);
394            o4(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
395        }
396
397        virtual void loadEAXIndirect(bool isInt) {
398            fprintf(stderr, "loadEAXIndirect(%d);\n", isInt);
399            if (isInt)
400                o4(0xE5900000); // ldr r0, [r0]
401            else
402                o4(0xE5D00000); // ldrb r0, [r0]
403        }
404
405        virtual void leaEAX(int ea) {
406            fprintf(stderr, "[!!! fixme !!!] leaEAX(%d);\n", ea);
407            error("Unimplemented");
408            if (ea < -4095 || ea > 4095) {
409                error("Offset out of range: %08x", ea);
410            }
411            o4(0xE59B0000 | (0x1fff & ea)); //ldr r0, [fp,#ea]
412        }
413
414        virtual void storeEAX(int ea) {
415            fprintf(stderr, "storeEAX(%d);\n", ea);
416            int fpOffset = ea;
417            if (fpOffset < -4095 || fpOffset > 4095) {
418                error("Offset out of range: %08x", ea);
419            }
420            if (fpOffset < 0) {
421                o4(0xE50B0000 | (0xfff & (-fpOffset))); // str r0, [fp,#-ea]
422            } else {
423                o4(0xE58B0000 | (0xfff & fpOffset)); // str r0, [fp,#ea]
424            }
425        }
426
427        virtual void loadEAX(int ea) {
428            fprintf(stderr, "loadEAX(%d);\n", ea);
429            int fpOffset = ea;
430            if (fpOffset < -4095 || fpOffset > 4095) {
431                error("Offset out of range: %08x", ea);
432            }
433            if (fpOffset < 0) {
434                o4(0xE51B0000 | (0xfff & (-fpOffset))); // ldr r0, [fp,#-ea]
435            } else {
436                o4(0xE59B0000 | (0xfff & fpOffset)); //ldr r0, [fp,#ea]
437            }
438        }
439
440        virtual void postIncrementOrDecrement(int n, int op) {
441            fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op);
442            /* Implement post-increment or post decrement.
443             */
444
445            error("Unimplemented");
446#if 0
447            gmov(0, n); /* 83 ADD */
448            o(decodeOp(op));
449#endif
450        }
451
452        virtual int beginFunctionCallArguments() {
453            fprintf(stderr, "beginFunctionCallArguments();\n");
454            return o4(0xE24DDF00); // Placeholder
455        }
456
457        virtual void endFunctionCallArguments(int a, int l) {
458            fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l);
459            if (l < 0 || l > 0x3FC) {
460                error("L out of range for stack adjustment: 0x%08x", l);
461            }
462            * (int*) a = 0xE24DDF00 | (l >> 2); // sub    sp, sp, #0 << 2
463            int argCount = l >> 2;
464            if (argCount > 0) {
465                int regArgCount = argCount > 4 ? 4 : argCount;
466                o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd   sp!,{}
467            }
468        }
469
470        virtual void storeEAToArg(int l) {
471            fprintf(stderr, "storeEAToArg(%d);\n", l);
472            if (l < 0 || l > 4096-4) {
473                error("l out of range for stack offset: 0x%08x", l);
474            }
475            o4(0xE58D0000 + l); // str r0, [sp, #4]
476        }
477
478        virtual int callForward(int symbol) {
479            fprintf(stderr, "callForward(%d);\n", symbol);
480            // Forward calls are always short (local)
481            return o4(0xEB000000 | encodeAddress(symbol));
482        }
483
484        virtual void callRelative(int t) {
485            fprintf(stderr, "callRelative(%d);\n", t);
486            int abs = t + getPC() + jumpOffset();
487            fprintf(stderr, "abs=%d (0x08%x)\n", abs, abs);
488            if (t >= - (1 << 25) && t < (1 << 25)) {
489                o4(0xEB000000 | encodeAddress(t));
490            } else {
491                // Long call.
492                o4(0xE59FC000); //         ldr    r12, .L1
493                o4(0xEA000000); //         b .L99
494                o4(t - 16);     // .L1:    .word 0
495                o4(0xE08CC00F); // .L99:   add r12,pc
496                o4(0xE12FFF3C); //         blx r12
497           }
498        }
499
500        virtual void callIndirect(int l) {
501            fprintf(stderr, "callIndirect(%d);\n", l);
502            oad(0x2494ff, l); /* call *xxx(%esp) */
503        }
504
505        virtual void adjustStackAfterCall(int l) {
506            fprintf(stderr, "adjustStackAfterCall(%d);\n", l);
507            if (l < 0 || l > 0x3FC) {
508                error("L out of range for stack adjustment: 0x%08x", l);
509            }
510            int argCount = l >> 2;
511            if (argCount > 4) {
512                int remainingArgs = argCount - 4;
513                o4(0xE28DDF00 | remainingArgs); // add    sp, sp, #0x3fc
514            }
515
516        }
517
518        virtual int jumpOffset() {
519            return 4;
520        }
521
522        /* output a symbol and patch all calls to it */
523        virtual void gsym(int t) {
524            fprintf(stderr, "gsym(0x%x)\n", t);
525            int n;
526            int base = getBase();
527            int pc = getPC();
528            fprintf(stderr, "pc = 0x%x\n", pc);
529            while (t) {
530                int data = * (int*) t;
531                int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
532                if (decodedOffset == 0) {
533                    n = 0;
534                } else {
535                    n = base + decodedOffset; /* next value */
536                }
537                *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
538                    | encodeRelAddress(pc - t - 8);
539                t = n;
540            }
541        }
542
543        virtual int disassemble(FILE* out) {
544               disasmOut = out;
545            disasm_interface_t  di;
546            di.di_readword = disassemble_readword;
547            di.di_printaddr = disassemble_printaddr;
548            di.di_printf = disassemble_printf;
549
550            int base = getBase();
551            int pc = getPC();
552            for(int i = base; i < pc; i += 4) {
553                fprintf(out, "%08x: %08x  ", i, *(int*) i);
554                ::disasm(&di, i, 0);
555            }
556            return 0;
557        }
558    private:
559        static FILE* disasmOut;
560
561        static u_int
562        disassemble_readword(u_int address)
563        {
564            return(*((u_int *)address));
565        }
566
567        static void
568        disassemble_printaddr(u_int address)
569        {
570            fprintf(disasmOut, "0x%08x", address);
571        }
572
573        static void
574        disassemble_printf(const char *fmt, ...) {
575            va_list ap;
576            va_start(ap, fmt);
577            vfprintf(disasmOut, fmt, ap);
578            va_end(ap);
579        }
580
581        static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
582
583        /** Encode a relative address that might also be
584         * a label.
585         */
586        int encodeAddress(int value) {
587            int base = getBase();
588            if (value >= base && value <= getPC() ) {
589                // This is a label, encode it relative to the base.
590                value = value - base;
591            }
592            return encodeRelAddress(value);
593        }
594
595        int encodeRelAddress(int value) {
596            return BRANCH_REL_ADDRESS_MASK & (value >> 2);
597        }
598
599        void error(const char* fmt,...) {
600            va_list ap;
601            va_start(ap, fmt);
602            vfprintf(stderr, fmt, ap);
603            va_end(ap);
604            exit(12);
605        }
606    };
607
608    class X86CodeGenerator : public CodeGenerator {
609    public:
610        X86CodeGenerator() {}
611        virtual ~X86CodeGenerator() {}
612
613        /* returns address to patch with local variable size
614        */
615        virtual int functionEntry(int argCount) {
616            o(0xe58955); /* push   %ebp, mov %esp, %ebp */
617            return oad(0xec81, 0); /* sub $xxx, %esp */
618        }
619
620        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
621            o(0xc3c9); /* leave, ret */
622            *(int *) localVariableAddress = localVariableSize; /* save local variables */
623        }
624
625        /* load immediate value */
626        virtual void li(int t) {
627            oad(0xb8, t); /* mov $xx, %eax */
628        }
629
630        virtual int gjmp(int t) {
631            return psym(0xe9, t);
632        }
633
634        /* l = 0: je, l == 1: jne */
635        virtual int gtst(bool l, int t) {
636            o(0x0fc085); /* test %eax, %eax, je/jne xxx */
637            return psym(0x84 + l, t);
638        }
639
640        virtual void gcmp(int op) {
641            int t = decodeOp(op);
642            o(0xc139); /* cmp %eax,%ecx */
643            li(0);
644            o(0x0f); /* setxx %al */
645            o(t + 0x90);
646            o(0xc0);
647        }
648
649        virtual void genOp(int op) {
650            o(decodeOp(op));
651            if (op == OP_MOD)
652                o(0x92); /* xchg %edx, %eax */
653        }
654
655        virtual void clearECX() {
656            oad(0xb9, 0); /* movl $0, %ecx */
657        }
658
659        virtual void pushEAX() {
660            o(0x50); /* push %eax */
661        }
662
663        virtual void popECX() {
664            o(0x59); /* pop %ecx */
665        }
666
667        virtual void storeEAXToAddressECX(bool isInt) {
668            o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */
669        }
670
671        virtual void loadEAXIndirect(bool isInt) {
672            if (isInt)
673                o(0x8b); /* mov (%eax), %eax */
674            else
675                o(0xbe0f); /* movsbl (%eax), %eax */
676            ob(0); /* add zero in code */
677        }
678
679        virtual void leaEAX(int ea) {
680            gmov(10, ea); /* leal EA, %eax */
681        }
682
683        virtual void storeEAX(int ea) {
684            gmov(6, ea); /* mov %eax, EA */
685        }
686
687        virtual void loadEAX(int ea) {
688            gmov(8, ea); /* mov EA, %eax */
689        }
690
691        virtual void postIncrementOrDecrement(int n, int op) {
692            /* Implement post-increment or post decrement.
693             */
694            gmov(0, n); /* 83 ADD */
695            o(decodeOp(op));
696        }
697
698        virtual int beginFunctionCallArguments() {
699            return oad(0xec81, 0); /* sub $xxx, %esp */
700        }
701
702        virtual void endFunctionCallArguments(int a, int l) {
703            * (int*) a = l;
704        }
705
706        virtual void storeEAToArg(int l) {
707            oad(0x248489, l); /* movl %eax, xxx(%esp) */
708        }
709
710        virtual int callForward(int symbol) {
711            return psym(0xe8, symbol); /* call xxx */
712        }
713
714        virtual void callRelative(int t) {
715            psym(0xe8, t); /* call xxx */
716        }
717
718        virtual void callIndirect(int l) {
719            oad(0x2494ff, l); /* call *xxx(%esp) */
720        }
721
722        virtual void adjustStackAfterCall(int l) {
723            oad(0xc481, l); /* add $xxx, %esp */
724        }
725
726        virtual int jumpOffset() {
727            return 5;
728        }
729
730        virtual int disassemble(FILE* out) {
731            return 1;
732        }
733
734    private:
735        static const int operatorHelper[];
736
737        int decodeOp(int op) {
738            if (op < 0 || op > OP_COUNT) {
739                fprintf(stderr, "Out-of-range operator: %d\n", op);
740                exit(1);
741            }
742            return operatorHelper[op];
743        }
744
745        void gmov(int l, int t) {
746            o(l + 0x83);
747            oad((t < LOCAL) << 7 | 5, t);
748        }
749    };
750
751    /* vars: value of variables
752     loc : local variable index
753     glo : global variable index
754     ind : output code ptr
755     rsym: return symbol
756     prog: output code
757     dstk: define stack
758     dptr, dch: macro state
759     */
760    int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk,
761            dptr, dch, last_id;
762    void* pSymbolBase;
763    void* pGlobalBase;
764    void* pVarsBase;
765    FILE* file;
766
767    CodeBuf codeBuf;
768    CodeGenerator* pGen;
769
770    static const int ALLOC_SIZE = 99999;
771
772    /* depends on the init string */
773    static const int TOK_STR_SIZE = 48;
774    static const int TOK_IDENT = 0x100;
775    static const int TOK_INT = 0x100;
776    static const int TOK_IF = 0x120;
777    static const int TOK_ELSE = 0x138;
778    static const int TOK_WHILE = 0x160;
779    static const int TOK_BREAK = 0x190;
780    static const int TOK_RETURN = 0x1c0;
781    static const int TOK_FOR = 0x1f8;
782    static const int TOK_DEFINE = 0x218;
783    static const int TOK_MAIN = 0x250;
784
785    static const int TOK_DUMMY = 1;
786    static const int TOK_NUM = 2;
787
788    static const int LOCAL = 0x200;
789
790    static const int SYM_FORWARD = 0;
791    static const int SYM_DEFINE = 1;
792
793    /* tokens in string heap */
794    static const int TAG_TOK = ' ';
795    static const int TAG_MACRO = 2;
796
797    static const int OP_INCREMENT = 0;
798    static const int OP_DECREMENT = 1;
799    static const int OP_MUL = 2;
800    static const int OP_DIV = 3;
801    static const int OP_MOD = 4;
802    static const int OP_PLUS = 5;
803    static const int OP_MINUS = 6;
804    static const int OP_SHIFT_LEFT = 7;
805    static const int OP_SHIFT_RIGHT = 8;
806    static const int OP_LESS_EQUAL = 9;
807    static const int OP_GREATER_EQUAL = 10;
808    static const int OP_LESS = 11;
809    static const int OP_GREATER = 12;
810    static const int OP_EQUALS = 13;
811    static const int OP_NOT_EQUALS = 14;
812    static const int OP_LOGICAL_AND = 15;
813    static const int OP_LOGICAL_OR = 16;
814    static const int OP_BIT_AND = 17;
815    static const int OP_BIT_XOR = 18;
816    static const int OP_BIT_OR = 19;
817    static const int OP_BIT_NOT = 20;
818    static const int OP_LOGICAL_NOT = 21;
819    static const int OP_COUNT = 22;
820
821    /* Operators are searched from front, the two-character operators appear
822     * before the single-character operators with the same first character.
823     * @ is used to pad out single-character operators.
824     */
825    static const char* operatorChars;
826    static const char operatorLevel[];
827
828    void pdef(int t) {
829        *(char *) dstk++ = t;
830    }
831
832    void inp() {
833        if (dptr) {
834            ch = *(char *) dptr++;
835            if (ch == TAG_MACRO) {
836                dptr = 0;
837                ch = dch;
838            }
839        } else
840            ch = fgetc(file);
841        /*    printf("ch=%c 0x%x\n", ch, ch); */
842    }
843
844    int isid() {
845        return isalnum(ch) | (ch == '_');
846    }
847
848    /* read a character constant */
849    void getq() {
850        if (ch == '\\') {
851            inp();
852            if (ch == 'n')
853                ch = '\n';
854        }
855    }
856
857    void next() {
858        int l, a;
859
860        while (isspace(ch) | (ch == '#')) {
861            if (ch == '#') {
862                inp();
863                next();
864                if (tok == TOK_DEFINE) {
865                    next();
866                    pdef(TAG_TOK); /* fill last ident tag */
867                    *(int *) tok = SYM_DEFINE;
868                    *(int *) (tok + 4) = dstk; /* define stack */
869                }
870                /* well we always save the values ! */
871                while (ch != '\n') {
872                    pdef(ch);
873                    inp();
874                }
875                pdef(ch);
876                pdef(TAG_MACRO);
877            }
878            inp();
879        }
880        tokl = 0;
881        tok = ch;
882        /* encode identifiers & numbers */
883        if (isid()) {
884            pdef(TAG_TOK);
885            last_id = dstk;
886            while (isid()) {
887                pdef(ch);
888                inp();
889            }
890            if (isdigit(tok)) {
891                tokc = strtol((char*) last_id, 0, 0);
892                tok = TOK_NUM;
893            } else {
894                *(char *) dstk = TAG_TOK; /* no need to mark end of string (we
895                 suppose data is initialized to zero by calloc) */
896                tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1))
897                        - sym_stk);
898                *(char *) dstk = 0; /* mark real end of ident for dlsym() */
899                tok = tok * 8 + TOK_IDENT;
900                if (tok > TOK_DEFINE) {
901                    tok = vars + tok;
902                    /*        printf("tok=%s %x\n", last_id, tok); */
903                    /* define handling */
904                    if (*(int *) tok == SYM_DEFINE) {
905                        dptr = *(int *) (tok + 4);
906                        dch = ch;
907                        inp();
908                        next();
909                    }
910                }
911            }
912        } else {
913            inp();
914            if (tok == '\'') {
915                tok = TOK_NUM;
916                getq();
917                tokc = ch;
918                inp();
919                inp();
920            } else if ((tok == '/') & (ch == '*')) {
921                inp();
922                while (ch) {
923                    while (ch != '*')
924                        inp();
925                    inp();
926                    if (ch == '/')
927                        ch = 0;
928                }
929                inp();
930                next();
931            } else {
932                const char* t = operatorChars;
933                int opIndex = 0;
934                while ((l = *t++) != 0) {
935                    a = *t++;
936                    tokl = operatorLevel[opIndex];
937                    tokc = opIndex;
938                    if ((l == tok) & ((a == ch) | (a == '@'))) {
939#if 0
940                        printf("%c%c -> tokl=%d tokc=0x%x\n",
941                                l, a, tokl, tokc);
942#endif
943                        if (a == ch) {
944                            inp();
945                            tok = TOK_DUMMY; /* dummy token for double tokens */
946                        }
947                        break;
948                    }
949                    opIndex++;
950                }
951                if (l == 0) {
952                    tokl = 0;
953                    tokc = 0;
954                }
955            }
956        }
957#if 0
958        {
959            int p;
960
961            printf("tok=0x%x ", tok);
962            if (tok >= TOK_IDENT) {
963                printf("'");
964                if (tok> TOK_DEFINE)
965                p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8;
966                else
967                p = sym_stk + 1 + (tok - TOK_IDENT) / 8;
968                while (*(char *)p != TAG_TOK && *(char *)p)
969                printf("%c", *(char *)p++);
970                printf("'\n");
971            } else if (tok == TOK_NUM) {
972                printf("%d\n", tokc);
973            } else {
974                printf("'%c'\n", tok);
975            }
976        }
977#endif
978    }
979
980    void error(const char *fmt, ...) {
981        va_list ap;
982
983        va_start(ap, fmt);
984        fprintf(stderr, "%ld: ", ftell((FILE *) file));
985        vfprintf(stderr, fmt, ap);
986        fprintf(stderr, "\n");
987        va_end(ap);
988        exit(1);
989    }
990
991    void skip(int c) {
992        if (tok != c) {
993            error("'%c' expected", c);
994        }
995        next();
996    }
997
998    /* l is one if '=' parsing wanted (quick hack) */
999    void unary(int l) {
1000        int n, t, a, c;
1001        t = 0;
1002        n = 1; /* type of expression 0 = forward, 1 = value, other =
1003         lvalue */
1004        if (tok == '\"') {
1005            pGen->li(glo);
1006            while (ch != '\"') {
1007                getq();
1008                *(char *) glo++ = ch;
1009                inp();
1010            }
1011            *(char *) glo = 0;
1012            glo = (glo + 4) & -4; /* align heap */
1013            inp();
1014            next();
1015        } else {
1016            c = tokl;
1017            a = tokc;
1018            t = tok;
1019            next();
1020            if (t == TOK_NUM) {
1021                pGen->li(a);
1022            } else if (c == 2) {
1023                /* -, +, !, ~ */
1024                unary(0);
1025                pGen->clearECX();
1026                if (t == '!')
1027                    pGen->gcmp(a);
1028                else
1029                    pGen->genOp(a);
1030            } else if (t == '(') {
1031                expr();
1032                skip(')');
1033            } else if (t == '*') {
1034                /* parse cast */
1035                skip('(');
1036                t = tok; /* get type */
1037                next(); /* skip int/char/void */
1038                next(); /* skip '*' or '(' */
1039                if (tok == '*') {
1040                    /* function type */
1041                    skip('*');
1042                    skip(')');
1043                    skip('(');
1044                    skip(')');
1045                    t = 0;
1046                }
1047                skip(')');
1048                unary(0);
1049                if (tok == '=') {
1050                    next();
1051                    pGen->pushEAX();
1052                    expr();
1053                    pGen->popECX();
1054                    pGen->storeEAXToAddressECX(t == TOK_INT);
1055                } else if (t) {
1056                    pGen->loadEAXIndirect(t == TOK_INT);
1057                }
1058            } else if (t == '&') {
1059                pGen->leaEAX(*(int *) tok);
1060                next();
1061            } else {
1062                n = *(int *) t;
1063                /* forward reference: try dlsym */
1064                if (!n) {
1065                    n = (int) dlsym(RTLD_DEFAULT, (char*) last_id);
1066                }
1067                if ((tok == '=') & l) {
1068                    /* assignment */
1069                    next();
1070                    expr();
1071                    pGen->storeEAX(n);
1072                } else if (tok != '(') {
1073                    /* variable */
1074                    pGen->loadEAX(n);
1075                    if (tokl == 11) {
1076                        pGen->postIncrementOrDecrement(n, tokc);
1077                        next();
1078                    }
1079                }
1080            }
1081        }
1082
1083        /* function call */
1084        if (tok == '(') {
1085            if (n == 1)
1086                pGen->pushEAX();
1087
1088            /* push args and invert order */
1089            a = pGen->beginFunctionCallArguments();
1090            next();
1091            l = 0;
1092            while (tok != ')') {
1093                expr();
1094                pGen->storeEAToArg(l);
1095                if (tok == ',')
1096                    next();
1097                l = l + 4;
1098            }
1099            pGen->endFunctionCallArguments(a, l);
1100            next();
1101            if (!n) {
1102                /* forward reference */
1103                t = t + 4;
1104                *(int *) t = pGen->callForward(*(int *) t);
1105            } else if (n == 1) {
1106                pGen->callIndirect(l);
1107                l = l + 4;
1108            } else {
1109                pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */
1110            }
1111            if (l)
1112                pGen->adjustStackAfterCall(l);
1113        }
1114    }
1115
1116    void sum(int l) {
1117        int t, n, a;
1118        t = 0;
1119        if (l-- == 1)
1120            unary(1);
1121        else {
1122            sum(l);
1123            a = 0;
1124            while (l == tokl) {
1125                n = tok;
1126                t = tokc;
1127                next();
1128
1129                if (l > 8) {
1130                    a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
1131                    sum(l);
1132                } else {
1133                    pGen->pushEAX();
1134                    sum(l);
1135                    pGen->popECX();
1136
1137                    if ((l == 4) | (l == 5)) {
1138                        pGen->gcmp(t);
1139                    } else {
1140                        pGen->genOp(t);
1141                    }
1142                }
1143            }
1144            /* && and || output code generation */
1145            if (a && l > 8) {
1146                a = pGen->gtst(t == OP_LOGICAL_OR, a);
1147                pGen->li(t != OP_LOGICAL_OR);
1148                pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
1149                pGen->gsym(a);
1150                pGen->li(t == OP_LOGICAL_OR);
1151            }
1152        }
1153    }
1154
1155    void expr() {
1156        sum(11);
1157    }
1158
1159    int test_expr() {
1160        expr();
1161        return pGen->gtst(0, 0);
1162    }
1163
1164    void block(int l) {
1165        int a, n, t;
1166
1167        if (tok == TOK_IF) {
1168            next();
1169            skip('(');
1170            a = test_expr();
1171            skip(')');
1172            block(l);
1173            if (tok == TOK_ELSE) {
1174                next();
1175                n = pGen->gjmp(0); /* jmp */
1176                pGen->gsym(a);
1177                block(l);
1178                pGen->gsym(n); /* patch else jmp */
1179            } else {
1180                pGen->gsym(a); /* patch if test */
1181            }
1182        } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
1183            t = tok;
1184            next();
1185            skip('(');
1186            if (t == TOK_WHILE) {
1187                n = codeBuf.getPC(); // top of loop, target of "next" iteration
1188                a = test_expr();
1189            } else {
1190                if (tok != ';')
1191                    expr();
1192                skip(';');
1193                n = codeBuf.getPC();
1194                a = 0;
1195                if (tok != ';')
1196                    a = test_expr();
1197                skip(';');
1198                if (tok != ')') {
1199                    t = pGen->gjmp(0);
1200                    expr();
1201                    pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
1202                    pGen->gsym(t);
1203                    n = t + 4;
1204                }
1205            }
1206            skip(')');
1207            block((int) &a);
1208            pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
1209            pGen->gsym(a);
1210        } else if (tok == '{') {
1211            next();
1212            /* declarations */
1213            decl(1);
1214            while (tok != '}')
1215                block(l);
1216            next();
1217        } else {
1218            if (tok == TOK_RETURN) {
1219                next();
1220                if (tok != ';')
1221                    expr();
1222                rsym = pGen->gjmp(rsym); /* jmp */
1223            } else if (tok == TOK_BREAK) {
1224                next();
1225                *(int *) l = pGen->gjmp(*(int *) l);
1226            } else if (tok != ';')
1227                expr();
1228            skip(';');
1229        }
1230    }
1231
1232    /* 'l' is true if local declarations */
1233    void decl(int l) {
1234        int a;
1235
1236        while ((tok == TOK_INT) | ((tok != -1) & (!l))) {
1237            if (tok == TOK_INT) {
1238                next();
1239                while (tok != ';') {
1240                    if (l) {
1241                        loc = loc + 4;
1242                        *(int *) tok = -loc;
1243                    } else {
1244                        *(int *) tok = glo;
1245                        glo = glo + 4;
1246                    }
1247                    next();
1248                    if (tok == ',')
1249                        next();
1250                }
1251                skip(';');
1252            } else {
1253                /* patch forward references (XXX: do not work for function
1254                 pointers) */
1255                pGen->gsym(*(int *) (tok + 4));
1256                /* put function address */
1257                *(int *) tok = codeBuf.getPC();
1258                next();
1259                skip('(');
1260                a = 8;
1261                int argCount = 0;
1262                while (tok != ')') {
1263                    /* read param name and compute offset */
1264                    *(int *) tok = a;
1265                    a = a + 4;
1266                    next();
1267                    if (tok == ',')
1268                        next();
1269                    argCount++;
1270                }
1271                next(); /* skip ')' */
1272                rsym = loc = 0;
1273                a = pGen->functionEntry(argCount);
1274                block(0);
1275                pGen->gsym(rsym);
1276                pGen->functionExit(argCount, a, loc);
1277            }
1278        }
1279    }
1280
1281    void cleanup() {
1282        if (sym_stk != 0) {
1283            free((void*) sym_stk);
1284            sym_stk = 0;
1285        }
1286        if (pGlobalBase != 0) {
1287            free((void*) pGlobalBase);
1288            pGlobalBase = 0;
1289        }
1290        if (pVarsBase != 0) {
1291            free(pVarsBase);
1292            pVarsBase = 0;
1293        }
1294        if (pGen) {
1295            delete pGen;
1296            pGen = 0;
1297        }
1298    }
1299
1300    void clear() {
1301        tok = 0;
1302        tokc = 0;
1303        tokl = 0;
1304        ch = 0;
1305        vars = 0;
1306        rsym = 0;
1307        loc = 0;
1308        glo = 0;
1309        sym_stk = 0;
1310        dstk = 0;
1311        dptr = 0;
1312        dch = 0;
1313        last_id = 0;
1314        file = 0;
1315        pGlobalBase = 0;
1316        pVarsBase = 0;
1317        pGen = 0;
1318    }
1319
1320    void setArchitecture(const char* architecture) {
1321        delete pGen;
1322        pGen = 0;
1323
1324        if (architecture != NULL) {
1325            if (strcmp(architecture, "arm") == 0) {
1326                pGen = new ARMCodeGenerator();
1327            } else if (strcmp(architecture, "x86") == 0) {
1328                pGen = new X86CodeGenerator();
1329            } else {
1330                fprintf(stderr, "Unknown architecture %s", architecture);
1331            }
1332        }
1333
1334        if (pGen == NULL) {
1335            pGen = new ARMCodeGenerator();
1336        }
1337    }
1338
1339public:
1340    struct args {
1341        args() {
1342            architecture = 0;
1343        }
1344        const char* architecture;
1345    };
1346
1347    compiler() {
1348        clear();
1349    }
1350
1351    ~compiler() {
1352        cleanup();
1353    }
1354
1355    int compile(FILE* in, args& args) {
1356        cleanup();
1357        clear();
1358        codeBuf.init(ALLOC_SIZE);
1359        setArchitecture(args.architecture);
1360        pGen->init(&codeBuf);
1361        file = in;
1362        sym_stk = (int) calloc(1, ALLOC_SIZE);
1363        dstk = (int) strcpy((char*) sym_stk,
1364                " int if else while break return for define main ")
1365                + TOK_STR_SIZE;
1366        pGlobalBase = calloc(1, ALLOC_SIZE);
1367        glo = (int) pGlobalBase;
1368        pVarsBase = calloc(1, ALLOC_SIZE);
1369        vars = (int) pVarsBase;
1370        inp();
1371        next();
1372        decl(0);
1373        pGen->finishCompile();
1374        return 0;
1375    }
1376
1377    int run(int argc, char** argv) {
1378        typedef int (*mainPtr)(int argc, char** argv);
1379        mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN);
1380        if (!aMain) {
1381            fprintf(stderr, "Could not find function \"main\".\n");
1382            return -1;
1383        }
1384        return aMain(argc, argv);
1385    }
1386
1387    int dump(FILE* out) {
1388        fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
1389        return 0;
1390    }
1391
1392    int disassemble(FILE* out) {
1393        return pGen->disassemble(out);
1394    }
1395
1396};
1397
1398const char* compiler::operatorChars =
1399    "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
1400
1401const char compiler::operatorLevel[] =
1402    {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
1403            5, 5, /* ==, != */
1404            9, 10, /* &&, || */
1405            6, 7, 8, /* & ^ | */
1406            2, 2 /* ~ ! */
1407            };
1408
1409FILE* compiler::ARMCodeGenerator::disasmOut;
1410
1411const int compiler::X86CodeGenerator::operatorHelper[] = {
1412        0x1,     // ++
1413        0xff,    // --
1414        0xc1af0f, // *
1415        0xf9f79991, // /
1416        0xf9f79991, // % (With manual assist to swap results)
1417        0xc801, // +
1418        0xd8f7c829, // -
1419        0xe0d391, // <<
1420        0xf8d391, // >>
1421        0xe, // <=
1422        0xd, // >=
1423        0xc, // <
1424        0xf, // >
1425        0x4, // ==
1426        0x5, // !=
1427        0x0, // &&
1428        0x1, // ||
1429        0xc821, // &
1430        0xc831, // ^
1431        0xc809, // |
1432        0xd0f7, // ~
1433        0x4     // !
1434};
1435
1436} // namespace acc
1437
1438// This is a separate function so it can easily be set by breakpoint in gdb.
1439int run(acc::compiler& c, int argc, char** argv) {
1440    return c.run(argc, argv);
1441}
1442
1443int main(int argc, char** argv) {
1444    bool doDump = false;
1445    bool doDisassemble = false;
1446    const char* inFile = NULL;
1447    const char* outFile = NULL;
1448    const char* architecture = "arm";
1449    int i;
1450    for (i = 1; i < argc; i++) {
1451        char* arg = argv[i];
1452        if (arg[0] == '-') {
1453            switch (arg[1]) {
1454            case 'a':
1455                if (i + 1 >= argc) {
1456                    fprintf(stderr, "Expected architecture after -a\n");
1457                    return 2;
1458                }
1459                architecture = argv[i+1];
1460                i += 1;
1461                break;
1462            case 'd':
1463                if (i + 1 >= argc) {
1464                    fprintf(stderr, "Expected filename after -d\n");
1465                    return 2;
1466                }
1467                doDump = true;
1468                outFile = argv[i + 1];
1469                i += 1;
1470                break;
1471            case 'S':
1472                doDisassemble = true;
1473                break;
1474            default:
1475                fprintf(stderr, "Unrecognized flag %s\n", arg);
1476                return 3;
1477            }
1478        } else if (inFile == NULL) {
1479            inFile = arg;
1480        } else {
1481            break;
1482        }
1483    }
1484
1485    FILE* in = stdin;
1486    if (inFile) {
1487        in = fopen(inFile, "r");
1488        if (!in) {
1489            fprintf(stderr, "Could not open input file %s\n", inFile);
1490            return 1;
1491        }
1492    }
1493    acc::compiler compiler;
1494    acc::compiler::args args;
1495    args.architecture = architecture;
1496    int compileResult = compiler.compile(in, args);
1497    if (in != stdin) {
1498        fclose(in);
1499    }
1500    if (compileResult) {
1501        fprintf(stderr, "Compile failed: %d\n", compileResult);
1502        return 6;
1503    }
1504    if (doDisassemble) {
1505        compiler.disassemble(stderr);
1506    }
1507    if (doDump) {
1508        FILE* save = fopen(outFile, "w");
1509        if (!save) {
1510            fprintf(stderr, "Could not open output file %s\n", outFile);
1511            return 5;
1512        }
1513        compiler.dump(save);
1514        fclose(save);
1515    } else {
1516        fprintf(stderr, "Executing compiled code:\n");
1517        int codeArgc = argc - i + 1;
1518        char** codeArgv = argv + i - 1;
1519        codeArgv[0] = (char*) (inFile ? inFile : "stdin");
1520        int result = run(compiler, codeArgc, codeArgv);
1521        fprintf(stderr, "result: %d\n", result);
1522        return result;
1523    }
1524
1525    return 0;
1526}
1527