acc.cpp revision 8df4619e09b8944c1aa3b743b556a63a283d8768
1/*
2 * Android "Almost" C Compiler.
3 * This is a compiler for a small subset of the C language, intended for use
4 * in scripting environments where speed and memory footprint are important.
5 *
6 * This code is based upon the "unobfuscated" version of the
7 * Obfuscated Tiny C compiler, see the file LICENSE for details.
8 *
9 */
10
11#include <ctype.h>
12#include <dlfcn.h>
13#include <errno.h>
14#include <stdarg.h>
15#include <stdint.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <cutils/hashmap.h>
20
21#if defined(__i386__)
22#include <sys/mman.h>
23#endif
24
25#if defined(__arm__)
26#include <unistd.h>
27#endif
28
29#if defined(__arm__)
30#define DEFAULT_ARM_CODEGEN
31#define PROVIDE_ARM_CODEGEN
32#elif defined(__i386__)
33#define DEFAULT_X86_CODEGEN
34#define PROVIDE_X86_CODEGEN
35#elif defined(__x86_64__)
36#define DEFAULT_X64_CODEGEN
37#define PROVIDE_X64_CODEGEN
38#endif
39
40
41#ifdef PROVIDE_ARM_CODEGEN
42#include "disassem.h"
43#endif
44
45#include <acc/acc.h>
46
47#define LOG_API(...) do {} while(0)
48// #define LOG_API(...) fprintf (stderr, __VA_ARGS__)
49
50#define LOG_STACK(...) do {} while(0)
51// #define LOG_STACK(...) fprintf (stderr, __VA_ARGS__)
52
53// #define ENABLE_ARM_DISASSEMBLY
54// #define PROVIDE_TRACE_CODEGEN
55
56namespace acc {
57
58// Subset of STL vector.
59template<class E> class Vector {
60    public:
61    Vector() {
62        mpBase = 0;
63        mUsed = 0;
64        mSize = 0;
65    }
66
67    ~Vector() {
68        if (mpBase) {
69            for(size_t i = 0; i < mUsed; i++)  {
70                mpBase[mUsed].~E();
71            }
72            free(mpBase);
73        }
74    }
75
76    inline E& operator[](size_t i) {
77        return mpBase[i];
78    }
79
80    inline E& front() {
81        return mpBase[0];
82    }
83
84    inline E& back() {
85        return mpBase[mUsed - 1];
86    }
87
88    void pop_back() {
89        mUsed -= 1;
90        mpBase[mUsed].~E();
91    }
92
93    void push_back(const E& item) {
94        * ensure(1) = item;
95    }
96
97    size_t size() {
98        return mUsed;
99    }
100
101private:
102    E* ensure(int n) {
103        size_t newUsed = mUsed + n;
104        if (newUsed > mSize) {
105            size_t newSize = mSize * 2 + 10;
106            if (newSize < newUsed) {
107                newSize = newUsed;
108            }
109            mpBase = (E*) realloc(mpBase, sizeof(E) * newSize);
110            mSize = newSize;
111        }
112        E* result = mpBase + mUsed;
113        mUsed = newUsed;
114        return result;
115    }
116
117    E* mpBase;
118    size_t mUsed;
119    size_t mSize;
120};
121
122class ErrorSink {
123public:
124    void error(const char *fmt, ...) {
125        va_list ap;
126        va_start(ap, fmt);
127        verror(fmt, ap);
128        va_end(ap);
129    }
130
131    virtual void verror(const char* fmt, va_list ap) = 0;
132};
133
134class Compiler : public ErrorSink {
135    typedef int tokenid_t;
136    enum TypeTag {
137        TY_INT, TY_CHAR, TY_VOID, TY_FLOAT, TY_DOUBLE,
138        TY_POINTER, TY_FUNC, TY_PARAM
139    };
140
141    struct Type {
142        TypeTag tag;
143        tokenid_t id; // For function arguments
144        Type* pHead;
145        Type* pTail;
146    };
147
148    class CodeBuf {
149        char* ind; // Output code pointer
150        char* pProgramBase;
151        ErrorSink* mErrorSink;
152        int mSize;
153        bool mOverflowed;
154
155        void release() {
156            if (pProgramBase != 0) {
157                free(pProgramBase);
158                pProgramBase = 0;
159            }
160        }
161
162        bool check(int n) {
163            int newSize = ind - pProgramBase + n;
164            bool overflow = newSize > mSize;
165            if (overflow && !mOverflowed) {
166                mOverflowed = true;
167                if (mErrorSink) {
168                    mErrorSink->error("Code too large: %d bytes", newSize);
169                }
170            }
171            return overflow;
172        }
173
174    public:
175        CodeBuf() {
176            pProgramBase = 0;
177            ind = 0;
178            mErrorSink = 0;
179            mSize = 0;
180            mOverflowed = false;
181        }
182
183        ~CodeBuf() {
184            release();
185        }
186
187        void init(int size) {
188            release();
189            mSize = size;
190            pProgramBase = (char*) calloc(1, size);
191            ind = pProgramBase;
192        }
193
194        void setErrorSink(ErrorSink* pErrorSink) {
195            mErrorSink = pErrorSink;
196        }
197
198        int o4(int n) {
199            if(check(4)) {
200                return 0;
201            }
202            intptr_t result = (intptr_t) ind;
203            * (int*) ind = n;
204            ind += 4;
205            return result;
206        }
207
208        /*
209         * Output a byte. Handles all values, 0..ff.
210         */
211        void ob(int n) {
212            if(check(1)) {
213                return;
214            }
215            *ind++ = n;
216        }
217
218        inline void* getBase() {
219            return (void*) pProgramBase;
220        }
221
222        intptr_t getSize() {
223            return ind - pProgramBase;
224        }
225
226        intptr_t getPC() {
227            return (intptr_t) ind;
228        }
229    };
230
231    /**
232     * A code generator creates an in-memory program, generating the code on
233     * the fly. There is one code generator implementation for each supported
234     * architecture.
235     *
236     * The code generator implements the following abstract machine:
237     * R0 - the accumulator.
238     * FP - a frame pointer for accessing function arguments and local
239     *      variables.
240     * SP - a stack pointer for storing intermediate results while evaluating
241     *      expressions. The stack pointer grows downwards.
242     *
243     * The function calling convention is that all arguments are placed on the
244     * stack such that the first argument has the lowest address.
245     * After the call, the result is in R0. The caller is responsible for
246     * removing the arguments from the stack.
247     * The R0 register is not saved across function calls. The
248     * FP and SP registers are saved.
249     */
250
251    class CodeGenerator {
252    public:
253        CodeGenerator() {
254            mErrorSink = 0;
255            pCodeBuf = 0;
256            pushType();
257        }
258        virtual ~CodeGenerator() {}
259
260        virtual void init(CodeBuf* pCodeBuf) {
261            this->pCodeBuf = pCodeBuf;
262            pCodeBuf->setErrorSink(mErrorSink);
263        }
264
265        virtual void setErrorSink(ErrorSink* pErrorSink) {
266            mErrorSink = pErrorSink;
267            if (pCodeBuf) {
268                pCodeBuf->setErrorSink(mErrorSink);
269            }
270        }
271
272        /* Emit a function prolog.
273         * argCount is the number of arguments.
274         * Save the old value of the FP.
275         * Set the new value of the FP.
276         * Convert from the native platform calling convention to
277         * our stack-based calling convention. This may require
278         * pushing arguments from registers to the stack.
279         * Allocate "N" bytes of stack space. N isn't known yet, so
280         * just emit the instructions for adjusting the stack, and return
281         * the address to patch up. The patching will be done in
282         * functionExit().
283         * returns address to patch with local variable size.
284        */
285        virtual int functionEntry(int argCount) = 0;
286
287        /* Emit a function epilog.
288         * Restore the old SP and FP register values.
289         * Return to the calling function.
290         * argCount - the number of arguments to the function.
291         * localVariableAddress - returned from functionEntry()
292         * localVariableSize - the size in bytes of the local variables.
293         */
294        virtual void functionExit(int argCount, int localVariableAddress,
295                                  int localVariableSize) = 0;
296
297        /* load immediate value to R0 */
298        virtual void li(int i, Type* pType) = 0;
299
300        /* load floating point immediate value to R0 */
301        virtual void lif(float f, Type* pType) = 0;
302
303        /* load double-precision floating point immediate value to R0 */
304        virtual void lid(double d, Type* pType) = 0;
305
306        /* Jump to a target, and return the address of the word that
307         * holds the target data, in case it needs to be fixed up later.
308         */
309        virtual int gjmp(int t) = 0;
310
311        /* Test R0 and jump to a target if the test succeeds.
312         * l = 0: je, l == 1: jne
313         * Return the address of the word that holds the targed data, in
314         * case it needs to be fixed up later.
315         */
316        virtual int gtst(bool l, int t) = 0;
317
318        /* Compare TOS against R0, and store the boolean result in R0.
319         * Pops TOS.
320         * op specifies the comparison.
321         */
322        virtual void gcmp(int op) = 0;
323
324        /* Perform the arithmetic op specified by op. TOS is the
325         * left argument, R0 is the right argument.
326         * Pops TOS.
327         */
328        virtual void genOp(int op) = 0;
329
330        /* Compare 0 against R0, and store the boolean result in R0.
331         * op specifies the comparison.
332         */
333        virtual void gUnaryCmp(int op) = 0;
334
335        /* Perform the arithmetic op specified by op. 0 is the
336         * left argument, R0 is the right argument.
337         */
338        virtual void genUnaryOp(int op) = 0;
339
340        /* Push R0 onto the stack.
341         */
342        virtual void pushR0() = 0;
343
344        /* Store R0 to the address stored in TOS.
345         * The TOS is popped.
346         * pPointerType is the type of the pointer (of the input R0).
347         */
348        virtual void storeR0ToTOS(Type* pPointerType) = 0;
349
350        /* Load R0 from the address stored in R0.
351         * pPointerType is the type of the pointer (of the input R0).
352         */
353        virtual void loadR0FromR0(Type* pPointerType) = 0;
354
355        /* Load the absolute address of a variable to R0.
356         * If ea <= LOCAL, then this is a local variable, or an
357         * argument, addressed relative to FP.
358         * else it is an absolute global address.
359         */
360        virtual void leaR0(int ea, Type* pPointerType) = 0;
361
362        /* Store R0 to a variable.
363         * If ea <= LOCAL, then this is a local variable, or an
364         * argument, addressed relative to FP.
365         * else it is an absolute global address.
366         */
367        virtual void storeR0(int ea) = 0;
368
369        /* load R0 from a variable.
370         * If ea <= LOCAL, then this is a local variable, or an
371         * argument, addressed relative to FP.
372         * else it is an absolute global address.
373         * If isIncDec is true, then the stored variable's value
374         * should be post-incremented or post-decremented, based
375         * on the value of op.
376         */
377        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) = 0;
378
379        /**
380         * Convert R0 to the given type.
381         */
382        virtual void convertR0(Type* pType) = 0;
383
384        /* Emit code to adjust the stack for a function call. Return the
385         * label for the address of the instruction that adjusts the
386         * stack size. This will be passed as argument "a" to
387         * endFunctionCallArguments.
388         */
389        virtual int beginFunctionCallArguments() = 0;
390
391        /* Emit code to store R0 to the stack at byte offset l.
392         */
393        virtual void storeR0ToArg(int l) = 0;
394
395        /* Patch the function call preamble.
396         * a is the address returned from beginFunctionCallArguments
397         * l is the number of bytes the arguments took on the stack.
398         * Typically you would also emit code to convert the argument
399         * list into whatever the native function calling convention is.
400         * On ARM for example you would pop the first 5 arguments into
401         * R0..R4
402         */
403        virtual void endFunctionCallArguments(int a, int l) = 0;
404
405        /* Emit a call to an unknown function. The argument "symbol" needs to
406         * be stored in the location where the address should go. It forms
407         * a chain. The address will be patched later.
408         * Return the address of the word that has to be patched.
409         */
410        virtual int callForward(int symbol, Type* pFunc) = 0;
411
412        /* Call a function using PC-relative addressing. t is the PC-relative
413         * address of the function. It has already been adjusted for the
414         * architectural jump offset, so just store it as-is.
415         */
416        virtual void callRelative(int t, Type* pFunc) = 0;
417
418        /* Call a function pointer. L is the number of bytes the arguments
419         * take on the stack. The address of the function is stored at
420         * location SP + l.
421         */
422        virtual void callIndirect(int l, Type* pFunc) = 0;
423
424        /* Adjust SP after returning from a function call. l is the
425         * number of bytes of arguments stored on the stack. isIndirect
426         * is true if this was an indirect call. (In which case the
427         * address of the function is stored at location SP + l.)
428         */
429        virtual void adjustStackAfterCall(int l, bool isIndirect) = 0;
430
431        /* Print a disassembly of the assembled code to out. Return
432         * non-zero if there is an error.
433         */
434        virtual int disassemble(FILE* out) = 0;
435
436        /* Generate a symbol at the current PC. t is the head of a
437         * linked list of addresses to patch.
438         */
439        virtual void gsym(int t) = 0;
440
441        /*
442         * Do any cleanup work required at the end of a compile.
443         * For example, an instruction cache might need to be
444         * invalidated.
445         * Return non-zero if there is an error.
446         */
447        virtual int finishCompile() = 0;
448
449        /**
450         * Adjust relative branches by this amount.
451         */
452        virtual int jumpOffset() = 0;
453
454        /**
455         * Stack alignment (in bytes) for this type of data
456         */
457        virtual size_t stackAlignment(Type* type) = 0;
458
459        /**
460         * Array element alignment (in bytes) for this type of data.
461         */
462        virtual size_t sizeOf(Type* type) = 0;
463
464    protected:
465        /*
466         * Output a byte. Handles all values, 0..ff.
467         */
468        void ob(int n) {
469            pCodeBuf->ob(n);
470        }
471
472        intptr_t o4(int data) {
473            return pCodeBuf->o4(data);
474        }
475
476        intptr_t getBase() {
477            return (intptr_t) pCodeBuf->getBase();
478        }
479
480        intptr_t getPC() {
481            return pCodeBuf->getPC();
482        }
483
484        intptr_t getSize() {
485            return pCodeBuf->getSize();
486        }
487
488        void error(const char* fmt,...) {
489            va_list ap;
490            va_start(ap, fmt);
491            mErrorSink->verror(fmt, ap);
492            va_end(ap);
493        }
494
495        void assert(bool test) {
496            if (!test) {
497                error("code generator assertion failed.");
498            }
499        }
500
501        void setR0Type(Type* pType) {
502            mExpressionStack.back() = pType;
503        }
504
505        Type* getR0Type() {
506            return mExpressionStack.back();
507        }
508
509        Type* getTOSType() {
510            return mExpressionStack[mExpressionStack.size()-2];
511        }
512
513        void pushType() {
514            mExpressionStack.push_back(NULL);
515        }
516
517        void popType() {
518            mExpressionStack.pop_back();
519        }
520
521        bool bitsSame(Type* pA, Type* pB) {
522            return collapseType(pA->tag) == collapseType(pB->tag);
523        }
524
525        TypeTag collapseType(TypeTag tag) {
526            static const TypeTag collapsedTag[] = {
527                    TY_INT, TY_INT, TY_VOID, TY_FLOAT, TY_DOUBLE, TY_INT,
528                    TY_VOID, TY_VOID};
529            return collapsedTag[tag];
530        }
531
532    private:
533        Vector<Type*> mExpressionStack;
534        CodeBuf* pCodeBuf;
535        ErrorSink* mErrorSink;
536    };
537
538#ifdef PROVIDE_ARM_CODEGEN
539
540    class ARMCodeGenerator : public CodeGenerator {
541    public:
542        ARMCodeGenerator() {}
543
544        virtual ~ARMCodeGenerator() {}
545
546        /* returns address to patch with local variable size
547        */
548        virtual int functionEntry(int argCount) {
549            LOG_API("functionEntry(%d);\n", argCount);
550            mStackUse = 0;
551            // sp -> arg4 arg5 ...
552            // Push our register-based arguments back on the stack
553            if (argCount > 0) {
554                int regArgCount = argCount <= 4 ? argCount : 4;
555                o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd    sp!, {}
556                mStackUse += regArgCount * 4;
557            }
558            // sp -> arg0 arg1 ...
559            o4(0xE92D4800); // stmfd sp!, {fp, lr}
560            mStackUse += 2 * 4;
561            // sp, fp -> oldfp, retadr, arg0 arg1 ....
562            o4(0xE1A0B00D); // mov    fp, sp
563            LOG_STACK("functionEntry: %d\n", mStackUse);
564            return o4(0xE24DD000); // sub    sp, sp, # <local variables>
565            // We don't know how many local variables we are going to use,
566            // but we will round the allocation up to a multiple of
567            // STACK_ALIGNMENT, so it won't affect the stack alignment.
568        }
569
570        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
571            LOG_API("functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
572            // Round local variable size up to a multiple of stack alignment
573            localVariableSize = ((localVariableSize + STACK_ALIGNMENT - 1) /
574                STACK_ALIGNMENT) * STACK_ALIGNMENT;
575            // Patch local variable allocation code:
576            if (localVariableSize < 0 || localVariableSize > 255) {
577                error("localVariables out of range: %d", localVariableSize);
578            }
579            *(char*) (localVariableAddress) = localVariableSize;
580
581            // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
582            o4(0xE1A0E00B); // mov lr, fp
583            o4(0xE59BB000); // ldr fp, [fp]
584            o4(0xE28ED004); // add sp, lr, #4
585            // sp -> retadr, arg0, ...
586            o4(0xE8BD4000); // ldmfd    sp!, {lr}
587            // sp -> arg0 ....
588            if (argCount > 0) {
589                // We store the PC into the lr so we can adjust the sp before
590                // returning. We need to pull off the registers we pushed
591                // earlier. We don't need to actually store them anywhere,
592                // just adjust the stack.
593                int regArgCount = argCount <= 4 ? argCount : 4;
594                o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
595            }
596            o4(0xE12FFF1E); // bx lr
597        }
598
599        /* load immediate value */
600        virtual void li(int t, Type* pType) {
601            LOG_API("li(%d);\n", t);
602            if (t >= 0 && t < 255) {
603                 o4(0xE3A00000 + t); // mov    r0, #0
604            } else if (t >= -256 && t < 0) {
605                // mvn means move constant ^ ~0
606                o4(0xE3E00001 - t); // mvn    r0, #0
607            } else {
608                  o4(0xE51F0000); //         ldr    r0, .L3
609                  o4(0xEA000000); //         b .L99
610                  o4(t);          // .L3:   .word 0
611                                  // .L99:
612            }
613            setR0Type(pType);
614        }
615
616        virtual void lif(float f, Type* pType) {
617            union { float f; int i; } converter;
618            converter.f = f;
619            li(converter.i, pType);
620        }
621
622        virtual void lid(double d, Type* pType) {
623            union { double d; int i[2]; } converter;
624            converter.d = d;
625            error("lid: unimplemented");
626            setR0Type(pType);
627        }
628
629        virtual int gjmp(int t) {
630            LOG_API("gjmp(%d);\n", t);
631            return o4(0xEA000000 | encodeAddress(t)); // b .L33
632        }
633
634        /* l = 0: je, l == 1: jne */
635        virtual int gtst(bool l, int t) {
636            LOG_API("gtst(%d, %d);\n", l, t);
637            o4(0xE3500000); // cmp r0,#0
638            int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq
639            return o4(branch | encodeAddress(t));
640        }
641
642        virtual void gcmp(int op) {
643            LOG_API("gcmp(%d);\n", op);
644            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
645            mStackUse -= 4;
646            o4(0xE1510000); // cmp r1, r1
647            switch(op) {
648            case OP_EQUALS:
649                o4(0x03A00001); // moveq r0,#1
650                o4(0x13A00000); // movne r0,#0
651                break;
652            case OP_NOT_EQUALS:
653                o4(0x03A00000); // moveq r0,#0
654                o4(0x13A00001); // movne r0,#1
655                break;
656            case OP_LESS_EQUAL:
657                o4(0xD3A00001); // movle r0,#1
658                o4(0xC3A00000); // movgt r0,#0
659                break;
660            case OP_GREATER:
661                o4(0xD3A00000); // movle r0,#0
662                o4(0xC3A00001); // movgt r0,#1
663                break;
664            case OP_GREATER_EQUAL:
665                o4(0xA3A00001); // movge r0,#1
666                o4(0xB3A00000); // movlt r0,#0
667                break;
668            case OP_LESS:
669                o4(0xA3A00000); // movge r0,#0
670                o4(0xB3A00001); // movlt r0,#1
671                break;
672            default:
673                error("Unknown comparison op %d", op);
674                break;
675            }
676            popType();
677        }
678
679        virtual void genOp(int op) {
680            LOG_API("genOp(%d);\n", op);
681            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
682            mStackUse -= 4;
683            switch(op) {
684            case OP_MUL:
685                o4(0x0E0000091); // mul     r0,r1,r0
686                break;
687            case OP_DIV:
688                callRuntime(runtime_DIV);
689                break;
690            case OP_MOD:
691                callRuntime(runtime_MOD);
692                break;
693            case OP_PLUS:
694                o4(0xE0810000);  // add     r0,r1,r0
695                break;
696            case OP_MINUS:
697                o4(0xE0410000);  // sub     r0,r1,r0
698                break;
699            case OP_SHIFT_LEFT:
700                o4(0xE1A00011);  // lsl     r0,r1,r0
701                break;
702            case OP_SHIFT_RIGHT:
703                o4(0xE1A00051);  // asr     r0,r1,r0
704                break;
705            case OP_BIT_AND:
706                o4(0xE0010000);  // and     r0,r1,r0
707                break;
708            case OP_BIT_XOR:
709                o4(0xE0210000);  // eor     r0,r1,r0
710                break;
711            case OP_BIT_OR:
712                o4(0xE1810000);  // orr     r0,r1,r0
713                break;
714            case OP_BIT_NOT:
715                o4(0xE1E00000);  // mvn     r0, r0
716                break;
717            default:
718                error("Unimplemented op %d\n", op);
719                break;
720            }
721            popType();
722        }
723
724        virtual void gUnaryCmp(int op) {
725            LOG_API("gcmp(%d);\n", op);
726            o4(0xE3A01000);  // mov    r1, #0
727            o4(0xE1510000); // cmp r1, r1
728            switch(op) {
729            case OP_NOT_EQUALS:
730                o4(0x03A00000); // moveq r0,#0
731                o4(0x13A00001); // movne r0,#1
732                break;
733             default:
734                error("Unknown unary comparison op %d", op);
735                break;
736            }
737        }
738
739        virtual void genUnaryOp(int op) {
740            LOG_API("genOp(%d);\n", op);
741            switch(op) {
742            case OP_PLUS:
743                // Do nothing
744                break;
745            case OP_MINUS:
746                o4(0xE3A01000);  // mov    r1, #0
747                o4(0xE0410000);  // sub     r0,r1,r0
748                break;
749            case OP_BIT_NOT:
750                o4(0xE1E00000);  // mvn     r0, r0
751                break;
752            default:
753                error("Unknown unary op %d\n", op);
754                break;
755            }
756        }
757
758        virtual void pushR0() {
759            LOG_API("pushR0();\n");
760            o4(0xE92D0001);  // stmfd   sp!,{r0}
761            mStackUse += 4;
762            pushType();
763            LOG_STACK("pushR0: %d\n", mStackUse);
764        }
765
766        virtual void storeR0ToTOS(Type* pPointerType) {
767            LOG_API("storeR0ToTOS(%d);\n", isInt);
768            assert(pPointerType->tag == TY_POINTER);
769            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
770            mStackUse -= 4;
771            switch (pPointerType->pHead->tag) {
772                case TY_INT:
773                    o4(0xE5810000); // str r0, [r1]
774                    break;
775                case TY_CHAR:
776                    o4(0xE5C10000); // strb r0, [r1]
777                    break;
778                default:
779                    error("storeR0ToTOS: unimplemented type");
780                    break;
781            }
782            popType();
783        }
784
785        virtual void loadR0FromR0(Type* pPointerType) {
786            LOG_API("loadR0FromR0(%d);\n", pPointerType);
787            assert(pPointerType->tag == TY_POINTER);
788            switch (pPointerType->pHead->tag) {
789                case TY_INT:
790                    o4(0xE5900000); // ldr r0, [r0]
791                    break;
792                case TY_CHAR:
793                    o4(0xE5D00000); // ldrb r0, [r0]
794                    break;
795                default:
796                    error("loadR0FromR0: unimplemented type");
797                    break;
798            }
799            setR0Type(pPointerType->pHead);
800        }
801
802        virtual void leaR0(int ea, Type* pPointerType) {
803            LOG_API("leaR0(%d);\n", ea);
804            if (ea < LOCAL) {
805                // Local, fp relative
806                if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) {
807                    error("Offset out of range: %08x", ea);
808                }
809                if (ea < 0) {
810                    o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub    r0, fp, #ea
811                } else {
812                    o4(0xE28B0F00 | (0xff & (ea >> 2))); // add    r0, fp, #ea
813                }
814            } else {
815                // Global, absolute.
816                o4(0xE59F0000); //        ldr    r0, .L1
817                o4(0xEA000000); //        b .L99
818                o4(ea);         // .L1:   .word 0
819                                // .L99:
820            }
821            setR0Type(pPointerType);
822        }
823
824        virtual void storeR0(int ea) {
825            LOG_API("storeR0(%d);\n", ea);
826            if (ea < LOCAL) {
827                // Local, fp relative
828                if (ea < -4095 || ea > 4095) {
829                    error("Offset out of range: %08x", ea);
830                }
831                if (ea < 0) {
832                    o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea]
833                } else {
834                    o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea]
835                }
836            } else{
837                // Global, absolute
838                o4(0xE59F1000); //         ldr r1, .L1
839                o4(0xEA000000); //         b .L99
840                o4(ea);         // .L1:    .word 0
841                o4(0xE5810000); // .L99:   str r0, [r1]
842            }
843        }
844
845        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
846            LOG_API("loadR0(%d, %d, %d);\n", ea, isIncDec, op);
847            if (ea < LOCAL) {
848                // Local, fp relative
849                if (ea < -4095 || ea > 4095) {
850                    error("Offset out of range: %08x", ea);
851                }
852                if (ea < 0) {
853                    o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea]
854                } else {
855                    o4(0xE59B0000 | (0xfff & ea));    // ldr r0, [fp,#ea]
856                }
857            } else {
858                // Global, absolute
859                o4(0xE59F2000); //        ldr r2, .L1
860                o4(0xEA000000); //        b .L99
861                o4(ea);         // .L1:   .word ea
862                o4(0xE5920000); // .L99:  ldr r0, [r2]
863            }
864
865            if (isIncDec) {
866                switch (op) {
867                case OP_INCREMENT:
868                    o4(0xE2801001); // add r1, r0, #1
869                    break;
870                case OP_DECREMENT:
871                    o4(0xE2401001); // sub r1, r0, #1
872                    break;
873                default:
874                    error("unknown opcode: %d", op);
875                }
876                if (ea < LOCAL) {
877                    // Local, fp relative
878                    // Don't need range check, was already checked above
879                    if (ea < 0) {
880                        o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea]
881                    } else {
882                        o4(0xE58B1000 | (0xfff & ea));    // str r1, [fp,#ea]
883                    }
884                } else{
885                    // Global, absolute
886                    // r2 is already set up from before.
887                    o4(0xE5821000); // str r1, [r2]
888               }
889            }
890            setR0Type(pType);
891        }
892
893        virtual void convertR0(Type* pType){
894            if (bitsSame(pType, getR0Type())) {
895                return;
896            }
897            error("Incompatible types");
898        }
899
900        virtual int beginFunctionCallArguments() {
901            LOG_API("beginFunctionCallArguments();\n");
902            return o4(0xE24DDF00); // Placeholder
903        }
904
905        virtual void storeR0ToArg(int l) {
906            LOG_API("storeR0ToArg(%d);\n", l);
907            if (l < 0 || l > 4096-4) {
908                error("l out of range for stack offset: 0x%08x", l);
909            }
910            o4(0xE58D0000 + l); // str r0, [sp, #4]
911        }
912
913        virtual void endFunctionCallArguments(int a, int l) {
914            LOG_API("endFunctionCallArguments(0x%08x, %d);\n", a, l);
915            int argCount = l >> 2;
916            int argumentStackUse = l;
917            if (argCount > 0) {
918                int regArgCount = argCount > 4 ? 4 : argCount;
919                argumentStackUse -= regArgCount * 4;
920                o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd   sp!,{}
921            }
922            mStackUse += argumentStackUse;
923
924            // Align stack.
925            int missalignment = mStackUse - ((mStackUse / STACK_ALIGNMENT)
926                    * STACK_ALIGNMENT);
927            mStackAlignmentAdjustment = 0;
928            if (missalignment > 0) {
929                mStackAlignmentAdjustment = STACK_ALIGNMENT - missalignment;
930            }
931            l += mStackAlignmentAdjustment;
932
933            if (l < 0 || l > 0x3FC) {
934                error("L out of range for stack adjustment: 0x%08x", l);
935            }
936            * (int*) a = 0xE24DDF00 | (l >> 2); // sub    sp, sp, #0 << 2
937            mStackUse += mStackAlignmentAdjustment;
938            LOG_STACK("endFunctionCallArguments mStackUse: %d, mStackAlignmentAdjustment %d\n",
939                      mStackUse, mStackAlignmentAdjustment);
940        }
941
942        virtual int callForward(int symbol, Type* pFunc) {
943            LOG_API("callForward(%d);\n", symbol);
944            setR0Type(pFunc->pHead);
945            // Forward calls are always short (local)
946            return o4(0xEB000000 | encodeAddress(symbol));
947        }
948
949        virtual void callRelative(int t, Type* pFunc) {
950            LOG_API("callRelative(%d);\n", t);
951            setR0Type(pFunc->pHead);
952            int abs = t + getPC() + jumpOffset();
953            LOG_API("abs=%d (0x%08x)\n", abs, abs);
954            if (t >= - (1 << 25) && t < (1 << 25)) {
955                o4(0xEB000000 | encodeAddress(t));
956            } else {
957                // Long call.
958                o4(0xE59FC000); //         ldr    r12, .L1
959                o4(0xEA000000); //         b .L99
960                o4(t - 12);     // .L1:    .word 0
961                o4(0xE08CC00F); // .L99:   add r12,pc
962                o4(0xE12FFF3C); //         blx r12
963           }
964        }
965
966        virtual void callIndirect(int l, Type* pFunc) {
967            LOG_API("callIndirect(%d);\n", l);
968            setR0Type(pFunc->pHead);
969            int argCount = l >> 2;
970            int poppedArgs = argCount > 4 ? 4 : argCount;
971            int adjustedL = l - (poppedArgs << 2) + mStackAlignmentAdjustment;
972            if (adjustedL < 0 || adjustedL > 4096-4) {
973                error("l out of range for stack offset: 0x%08x", l);
974            }
975            o4(0xE59DC000 | (0xfff & adjustedL)); // ldr    r12, [sp,#adjustedL]
976            o4(0xE12FFF3C); // blx r12
977        }
978
979        virtual void adjustStackAfterCall(int l, bool isIndirect) {
980            LOG_API("adjustStackAfterCall(%d, %d);\n", l, isIndirect);
981            int argCount = l >> 2;
982            int stackArgs = argCount > 4 ? argCount - 4 : 0;
983            int stackUse =  stackArgs + (isIndirect ? 1 : 0)
984                + (mStackAlignmentAdjustment >> 2);
985            if (stackUse) {
986                if (stackUse < 0 || stackUse > 255) {
987                    error("L out of range for stack adjustment: 0x%08x", l);
988                }
989                o4(0xE28DDF00 | stackUse); // add    sp, sp, #stackUse << 2
990                mStackUse -= stackUse * 4;
991                LOG_STACK("adjustStackAfterCall: %d\n", mStackUse);
992            }
993        }
994
995        virtual int jumpOffset() {
996            return 8;
997        }
998
999        /* output a symbol and patch all calls to it */
1000        virtual void gsym(int t) {
1001            LOG_API("gsym(0x%x)\n", t);
1002            int n;
1003            int base = getBase();
1004            int pc = getPC();
1005            LOG_API("pc = 0x%x\n", pc);
1006            while (t) {
1007                int data = * (int*) t;
1008                int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
1009                if (decodedOffset == 0) {
1010                    n = 0;
1011                } else {
1012                    n = base + decodedOffset; /* next value */
1013                }
1014                *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
1015                    | encodeRelAddress(pc - t - 8);
1016                t = n;
1017            }
1018        }
1019
1020        virtual int finishCompile() {
1021#if defined(__arm__)
1022            const long base = long(getBase());
1023            const long curr = long(getPC());
1024            int err = cacheflush(base, curr, 0);
1025            return err;
1026#else
1027            return 0;
1028#endif
1029        }
1030
1031        virtual int disassemble(FILE* out) {
1032#ifdef ENABLE_ARM_DISASSEMBLY
1033            disasmOut = out;
1034            disasm_interface_t  di;
1035            di.di_readword = disassemble_readword;
1036            di.di_printaddr = disassemble_printaddr;
1037            di.di_printf = disassemble_printf;
1038
1039            int base = getBase();
1040            int pc = getPC();
1041            for(int i = base; i < pc; i += 4) {
1042                fprintf(out, "%08x: %08x  ", i, *(int*) i);
1043                ::disasm(&di, i, 0);
1044            }
1045#endif
1046            return 0;
1047        }
1048
1049        /**
1050         * Stack alignment (in bytes) for this type of data
1051         */
1052        virtual size_t stackAlignment(Type* pType){
1053            switch(pType->tag) {
1054                case TY_DOUBLE:
1055                    return 8;
1056                default:
1057                    return 4;
1058            }
1059        }
1060
1061        /**
1062         * Array element alignment (in bytes) for this type of data.
1063         */
1064        virtual size_t sizeOf(Type* pType){
1065            switch(pType->tag) {
1066                case TY_INT:
1067                    return 4;
1068                case TY_CHAR:
1069                    return 1;
1070                default:
1071                    return 0;
1072                case TY_FLOAT:
1073                    return 4;
1074                case TY_DOUBLE:
1075                    return 8;
1076                case TY_POINTER:
1077                    return 4;
1078            }
1079        }
1080    private:
1081        static FILE* disasmOut;
1082
1083        static u_int
1084        disassemble_readword(u_int address)
1085        {
1086            return(*((u_int *)address));
1087        }
1088
1089        static void
1090        disassemble_printaddr(u_int address)
1091        {
1092            fprintf(disasmOut, "0x%08x", address);
1093        }
1094
1095        static void
1096        disassemble_printf(const char *fmt, ...) {
1097            va_list ap;
1098            va_start(ap, fmt);
1099            vfprintf(disasmOut, fmt, ap);
1100            va_end(ap);
1101        }
1102
1103        static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
1104
1105        /** Encode a relative address that might also be
1106         * a label.
1107         */
1108        int encodeAddress(int value) {
1109            int base = getBase();
1110            if (value >= base && value <= getPC() ) {
1111                // This is a label, encode it relative to the base.
1112                value = value - base;
1113            }
1114            return encodeRelAddress(value);
1115        }
1116
1117        int encodeRelAddress(int value) {
1118            return BRANCH_REL_ADDRESS_MASK & (value >> 2);
1119        }
1120
1121        typedef int (*int2FnPtr)(int a, int b);
1122        void callRuntime(int2FnPtr fn) {
1123            o4(0xE59F2000); // ldr    r2, .L1
1124            o4(0xEA000000); // b      .L99
1125            o4((int) fn);   //.L1:  .word  fn
1126            o4(0xE12FFF32); //.L99: blx    r2
1127        }
1128
1129        static int runtime_DIV(int a, int b) {
1130            return b / a;
1131        }
1132
1133        static int runtime_MOD(int a, int b) {
1134            return b % a;
1135        }
1136
1137        static const int STACK_ALIGNMENT = 8;
1138        int mStackUse;
1139        // This variable holds the amount we adjusted the stack in the most
1140        // recent endFunctionCallArguments call. It's examined by the
1141        // following adjustStackAfterCall call.
1142        int mStackAlignmentAdjustment;
1143    };
1144
1145#endif // PROVIDE_ARM_CODEGEN
1146
1147#ifdef PROVIDE_X86_CODEGEN
1148
1149    class X86CodeGenerator : public CodeGenerator {
1150    public:
1151        X86CodeGenerator() {}
1152        virtual ~X86CodeGenerator() {}
1153
1154        /* returns address to patch with local variable size
1155        */
1156        virtual int functionEntry(int argCount) {
1157            o(0xe58955); /* push   %ebp, mov %esp, %ebp */
1158            return oad(0xec81, 0); /* sub $xxx, %esp */
1159        }
1160
1161        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
1162            o(0xc3c9); /* leave, ret */
1163            *(int *) localVariableAddress = localVariableSize; /* save local variables */
1164        }
1165
1166        /* load immediate value */
1167        virtual void li(int i, Type* pType) {
1168            oad(0xb8, i); /* mov $xx, %eax */
1169            setR0Type(pType);
1170        }
1171
1172        virtual void lif(float f, Type* pType) {
1173            union { float f; int i; } converter;
1174            converter.f = f;
1175            setR0Type(pType);
1176            error("unimplemented: lif");
1177        }
1178
1179        virtual void lid(double d, Type* pType) {
1180            union { double d; int i[2]; } converter;
1181            converter.d = d;
1182            setR0Type(pType);
1183            error("unimplemented: lid");
1184        }
1185
1186        virtual int gjmp(int t) {
1187            return psym(0xe9, t);
1188        }
1189
1190        /* l = 0: je, l == 1: jne */
1191        virtual int gtst(bool l, int t) {
1192            o(0x0fc085); /* test %eax, %eax, je/jne xxx */
1193            return psym(0x84 + l, t);
1194        }
1195
1196        virtual void gcmp(int op) {
1197            int t = decodeOp(op);
1198            o(0x59); /* pop %ecx */
1199            o(0xc139); /* cmp %eax,%ecx */
1200            li(0, NULL);
1201            o(0x0f); /* setxx %al */
1202            o(t + 0x90);
1203            o(0xc0);
1204            popType();
1205        }
1206
1207        virtual void genOp(int op) {
1208            o(0x59); /* pop %ecx */
1209            o(decodeOp(op));
1210            if (op == OP_MOD)
1211                o(0x92); /* xchg %edx, %eax */
1212            popType();
1213        }
1214
1215        virtual void gUnaryCmp(int op) {
1216            oad(0xb9, 0); /* movl $0, %ecx */
1217            int t = decodeOp(op);
1218            o(0xc139); /* cmp %eax,%ecx */
1219            li(0, NULL);
1220            o(0x0f); /* setxx %al */
1221            o(t + 0x90);
1222            o(0xc0);
1223        }
1224
1225        virtual void genUnaryOp(int op) {
1226            oad(0xb9, 0); /* movl $0, %ecx */
1227            o(decodeOp(op));
1228        }
1229
1230        virtual void pushR0() {
1231            o(0x50); /* push %eax */
1232            pushType();
1233        }
1234
1235        virtual void storeR0ToTOS(Type* pPointerType) {
1236            assert(pPointerType->tag == TY_POINTER);
1237            o(0x59); /* pop %ecx */
1238            popType();
1239            switch (pPointerType->pHead->tag) {
1240                case TY_INT:
1241                    o(0x0189); /* movl %eax/%al, (%ecx) */
1242                    break;
1243                case TY_CHAR:
1244                    o(0x0188); /* movl %eax/%al, (%ecx) */
1245                    break;
1246                default:
1247                    error("storeR0ToTOS: unsupported type");
1248                    break;
1249            }
1250        }
1251
1252        virtual void loadR0FromR0(Type* pPointerType) {
1253            assert(pPointerType->tag == TY_POINTER);
1254            switch (pPointerType->pHead->tag) {
1255                case TY_INT:
1256                    o(0x8b); /* mov (%eax), %eax */
1257                    break;
1258                case TY_CHAR:
1259                    o(0xbe0f); /* movsbl (%eax), %eax */
1260                    break;
1261                default:
1262                    error("loadR0FromR0: unsupported type");
1263                    break;
1264            }
1265            ob(0); /* add zero in code */
1266            setR0Type(pPointerType->pHead);
1267        }
1268
1269        virtual void leaR0(int ea, Type* pPointerType) {
1270            gmov(10, ea); /* leal EA, %eax */
1271            setR0Type(pPointerType);
1272        }
1273
1274        virtual void storeR0(int ea) {
1275            gmov(6, ea); /* mov %eax, EA */
1276        }
1277
1278        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
1279            gmov(8, ea); /* mov EA, %eax */
1280            if (isIncDec) {
1281                /* Implement post-increment or post decrement.
1282                 */
1283                gmov(0, ea); /* 83 ADD */
1284                o(decodeOp(op));
1285            }
1286            setR0Type(pType);
1287        }
1288
1289        virtual void convertR0(Type* pType){
1290            if (bitsSame(pType, getR0Type())) {
1291                return;
1292            }
1293            error("convertR0: unsupported conversion %d <- %d", pType->tag,
1294                  getR0Type()->tag);
1295        }
1296
1297        virtual int beginFunctionCallArguments() {
1298            return oad(0xec81, 0); /* sub $xxx, %esp */
1299        }
1300
1301        virtual void storeR0ToArg(int l) {
1302            oad(0x248489, l); /* movl %eax, xxx(%esp) */
1303        }
1304
1305        virtual void endFunctionCallArguments(int a, int l) {
1306            * (int*) a = l;
1307        }
1308
1309        virtual int callForward(int symbol, Type* pFunc) {
1310            setR0Type(pFunc->pHead);
1311            return psym(0xe8, symbol); /* call xxx */
1312        }
1313
1314        virtual void callRelative(int t, Type* pFunc) {
1315            setR0Type(pFunc->pHead);
1316            psym(0xe8, t); /* call xxx */
1317        }
1318
1319        virtual void callIndirect(int l, Type* pFunc) {
1320            setR0Type(pFunc->pHead);
1321            oad(0x2494ff, l); /* call *xxx(%esp) */
1322        }
1323
1324        virtual void adjustStackAfterCall(int l, bool isIndirect) {
1325            if (isIndirect) {
1326                l += 4;
1327            }
1328            if (l > 0) {
1329                oad(0xc481, l); /* add $xxx, %esp */
1330            }
1331        }
1332
1333        virtual int jumpOffset() {
1334            return 5;
1335        }
1336
1337        virtual int disassemble(FILE* out) {
1338            return 0;
1339        }
1340
1341        /* output a symbol and patch all calls to it */
1342        virtual void gsym(int t) {
1343            int n;
1344            int pc = getPC();
1345            while (t) {
1346                n = *(int *) t; /* next value */
1347                *(int *) t = pc - t - 4;
1348                t = n;
1349            }
1350        }
1351
1352        virtual int finishCompile() {
1353            size_t pagesize = 4096;
1354            size_t base = (size_t) getBase() & ~ (pagesize - 1);
1355            size_t top =  ((size_t) getPC() + pagesize - 1) & ~ (pagesize - 1);
1356            int err = mprotect((void*) base, top - base, PROT_READ | PROT_WRITE | PROT_EXEC);
1357            if (err) {
1358               error("mprotect() failed: %d", errno);
1359            }
1360            return err;
1361        }
1362
1363        /**
1364         * Stack alignment (in bytes) for this type of data
1365         */
1366        virtual size_t stackAlignment(Type* pType){
1367            switch(pType->tag) {
1368                case TY_DOUBLE:
1369                    return 8;
1370                default:
1371                    return 4;
1372            }
1373        }
1374
1375        /**
1376         * Array element alignment (in bytes) for this type of data.
1377         */
1378        virtual size_t sizeOf(Type* pType){
1379            switch(pType->tag) {
1380                case TY_INT:
1381                    return 4;
1382                case TY_CHAR:
1383                    return 1;
1384                default:
1385                    return 0;
1386                case TY_FLOAT:
1387                    return 4;
1388                case TY_DOUBLE:
1389                    return 8;
1390                case TY_POINTER:
1391                    return 4;
1392            }
1393        }
1394
1395    private:
1396
1397        /** Output 1 to 4 bytes.
1398         *
1399         */
1400        void o(int n) {
1401            /* cannot use unsigned, so we must do a hack */
1402            while (n && n != -1) {
1403                ob(n & 0xff);
1404                n = n >> 8;
1405            }
1406        }
1407
1408        /* psym is used to put an instruction with a data field which is a
1409         reference to a symbol. It is in fact the same as oad ! */
1410        int psym(int n, int t) {
1411            return oad(n, t);
1412        }
1413
1414        /* instruction + address */
1415        int oad(int n, int t) {
1416            o(n);
1417            int result = getPC();
1418            o4(t);
1419            return result;
1420        }
1421
1422
1423        static const int operatorHelper[];
1424
1425        int decodeOp(int op) {
1426            if (op < 0 || op > OP_COUNT) {
1427                error("Out-of-range operator: %d\n", op);
1428                op = 0;
1429            }
1430            return operatorHelper[op];
1431        }
1432
1433        void gmov(int l, int t) {
1434            o(l + 0x83);
1435            oad((t > -LOCAL && t < LOCAL) << 7 | 5, t);
1436        }
1437    };
1438
1439#endif // PROVIDE_X86_CODEGEN
1440
1441#ifdef PROVIDE_TRACE_CODEGEN
1442    class TraceCodeGenerator : public CodeGenerator {
1443    private:
1444        CodeGenerator* mpBase;
1445
1446    public:
1447        TraceCodeGenerator(CodeGenerator* pBase) {
1448            mpBase = pBase;
1449        }
1450
1451        virtual ~TraceCodeGenerator() {
1452            delete mpBase;
1453        }
1454
1455        virtual void init(CodeBuf* pCodeBuf) {
1456            mpBase->init(pCodeBuf);
1457        }
1458
1459        void setErrorSink(ErrorSink* pErrorSink) {
1460            mpBase->setErrorSink(pErrorSink);
1461        }
1462
1463        /* returns address to patch with local variable size
1464        */
1465        virtual int functionEntry(int argCount) {
1466            int result = mpBase->functionEntry(argCount);
1467            fprintf(stderr, "functionEntry(%d) -> %d\n", argCount, result);
1468            return result;
1469        }
1470
1471        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
1472            fprintf(stderr, "functionExit(%d, %d, %d)\n",
1473                    argCount, localVariableAddress, localVariableSize);
1474            mpBase->functionExit(argCount, localVariableAddress, localVariableSize);
1475        }
1476
1477        /* load immediate value */
1478        virtual void li(int t, Type* pType) {
1479            fprintf(stderr, "li(%d)\n", t);
1480            mpBase->li(t, pType);
1481        }
1482
1483        virtual void lif(float f, Type* pType) {
1484            fprintf(stderr, "lif(%g)\n", f);
1485            mpBase->lif(f, pType);
1486        }
1487
1488        virtual void lid(double d, Type* pType) {
1489            fprintf(stderr, "lid(%g)\n", d);
1490            mpBase->lid(d, pType);
1491        }
1492
1493        virtual int gjmp(int t) {
1494            int result = mpBase->gjmp(t);
1495            fprintf(stderr, "gjmp(%d) = %d\n", t, result);
1496            return result;
1497        }
1498
1499        /* l = 0: je, l == 1: jne */
1500        virtual int gtst(bool l, int t) {
1501            int result = mpBase->gtst(l, t);
1502            fprintf(stderr, "gtst(%d,%d) = %d\n", l, t, result);
1503            return result;
1504        }
1505
1506        virtual void gcmp(int op) {
1507            fprintf(stderr, "gcmp(%d)\n", op);
1508            mpBase->gcmp(op);
1509        }
1510
1511        virtual void genOp(int op) {
1512            fprintf(stderr, "genOp(%d)\n", op);
1513            mpBase->genOp(op);
1514        }
1515
1516
1517        virtual void gUnaryCmp(int op) {
1518            fprintf(stderr, "gUnaryCmp(%d)\n", op);
1519            mpBase->gUnaryCmp(op);
1520        }
1521
1522        virtual void genUnaryOp(int op) {
1523            fprintf(stderr, "genUnaryOp(%d)\n", op);
1524            mpBase->genUnaryOp(op);
1525        }
1526
1527        virtual void pushR0() {
1528            fprintf(stderr, "pushR0()\n");
1529            mpBase->pushR0();
1530        }
1531
1532        virtual void storeR0ToTOS(Type* pPointerType) {
1533            fprintf(stderr, "storeR0ToTOS(%d)\n", pPointerType->pHead->tag);
1534            mpBase->storeR0ToTOS(pPointerType);
1535        }
1536
1537        virtual void loadR0FromR0(Type* pPointerType) {
1538            fprintf(stderr, "loadR0FromR0(%d)\n", pPointerType->pHead->tag);
1539            mpBase->loadR0FromR0(pPointerType);
1540        }
1541
1542        virtual void leaR0(int ea, Type* pPointerType) {
1543            fprintf(stderr, "leaR0(%d)\n", ea);
1544            mpBase->leaR0(ea, pPointerType);
1545        }
1546
1547        virtual void storeR0(int ea) {
1548            fprintf(stderr, "storeR0(%d)\n", ea);
1549            mpBase->storeR0(ea);
1550        }
1551
1552        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
1553            fprintf(stderr, "loadR0(%d, %d, %d)\n", ea, isIncDec, op);
1554            mpBase->loadR0(ea, isIncDec, op, pType);
1555        }
1556
1557        virtual void convertR0(Type* pType){
1558            fprintf(stderr, "convertR0(pType)\n");
1559            mpBase->convertR0(pType);
1560        }
1561
1562        virtual int beginFunctionCallArguments() {
1563            int result = mpBase->beginFunctionCallArguments();
1564            fprintf(stderr, "beginFunctionCallArguments() = %d\n", result);
1565            return result;
1566        }
1567
1568        virtual void storeR0ToArg(int l) {
1569            fprintf(stderr, "storeR0ToArg(%d)\n", l);
1570            mpBase->storeR0ToArg(l);
1571        }
1572
1573        virtual void endFunctionCallArguments(int a, int l) {
1574            fprintf(stderr, "endFunctionCallArguments(%d, %d)\n", a, l);
1575            mpBase->endFunctionCallArguments(a, l);
1576        }
1577
1578        virtual int callForward(int symbol, Type* pFunc) {
1579            int result = mpBase->callForward(symbol, pFunc);
1580            fprintf(stderr, "callForward(%d) = %d\n", symbol, result);
1581            return result;
1582        }
1583
1584        virtual void callRelative(int t, Type* pFunc) {
1585            fprintf(stderr, "callRelative(%d)\n", t);
1586            mpBase->callRelative(t, pFunc);
1587        }
1588
1589        virtual void callIndirect(int l, Type* pFunc) {
1590            fprintf(stderr, "callIndirect(%d)\n", l);
1591            mpBase->callIndirect(l, pFunc);
1592        }
1593
1594        virtual void adjustStackAfterCall(int l, bool isIndirect) {
1595            fprintf(stderr, "adjustStackAfterCall(%d, %d)\n", l, isIndirect);
1596            mpBase->adjustStackAfterCall(l, isIndirect);
1597        }
1598
1599        virtual int jumpOffset() {
1600            return mpBase->jumpOffset();
1601        }
1602
1603        virtual int disassemble(FILE* out) {
1604            return mpBase->disassemble(out);
1605        }
1606
1607        /* output a symbol and patch all calls to it */
1608        virtual void gsym(int t) {
1609            fprintf(stderr, "gsym(%d)\n", t);
1610            mpBase->gsym(t);
1611        }
1612
1613        virtual int finishCompile() {
1614            int result = mpBase->finishCompile();
1615            fprintf(stderr, "finishCompile() = %d\n", result);
1616            return result;
1617        }
1618
1619        /**
1620         * Stack alignment (in bytes) for this type of data
1621         */
1622        virtual size_t stackAlignment(Type* pType){
1623            return mpBase->stackAlignment(pType);
1624        }
1625
1626        /**
1627         * Array element alignment (in bytes) for this type of data.
1628         */
1629        virtual size_t sizeOf(Type* pType){
1630            return mpBase->sizeOf(pType);
1631        }
1632    };
1633
1634#endif // PROVIDE_TRACE_CODEGEN
1635
1636    class Arena {
1637    public:
1638        // Used to record a given allocation amount.
1639        // Used:
1640        // Mark mark = arena.mark();
1641        // ... lots of arena.allocate()
1642        // arena.free(mark);
1643
1644        struct Mark {
1645            size_t chunk;
1646            size_t offset;
1647        };
1648
1649        Arena() {
1650            mCurrentChunk = 0;
1651            Chunk start(CHUNK_SIZE);
1652            mData.push_back(start);
1653        }
1654
1655        ~Arena() {
1656            for(size_t i = 0; i < mData.size(); i++) {
1657                mData[i].free();
1658            }
1659        }
1660
1661        // Alloc using the standard alignment size safe for any variable
1662        void* alloc(size_t size) {
1663            return alloc(size, 8);
1664        }
1665
1666        Mark mark(){
1667            Mark result;
1668            result.chunk = mCurrentChunk;
1669            result.offset = mData[mCurrentChunk].mOffset;
1670            return result;
1671        }
1672
1673        void freeToMark(const Mark& mark) {
1674            mCurrentChunk = mark.chunk;
1675            mData[mCurrentChunk].mOffset = mark.offset;
1676        }
1677
1678    private:
1679        // Allocate memory aligned to a given size
1680        // and a given power-of-two-sized alignment (e.g. 1,2,4,8,...)
1681        // Memory is not zero filled.
1682
1683        void* alloc(size_t size, size_t alignment) {
1684            while (size > mData[mCurrentChunk].remainingCapacity(alignment)) {
1685                if (mCurrentChunk + 1 < mData.size()) {
1686                    mCurrentChunk++;
1687                } else {
1688                    size_t allocSize = CHUNK_SIZE;
1689                    if (allocSize < size + alignment - 1) {
1690                        allocSize = size + alignment - 1;
1691                    }
1692                    Chunk chunk(allocSize);
1693                    mData.push_back(chunk);
1694                    mCurrentChunk++;
1695                }
1696            }
1697            return mData[mCurrentChunk].allocate(size, alignment);
1698        }
1699
1700        static const size_t CHUNK_SIZE = 128*1024;
1701        // Note: this class does not deallocate its
1702        // memory when it's destroyed. It depends upon
1703        // its parent to deallocate the memory.
1704        struct Chunk {
1705            Chunk() {
1706                mpData = 0;
1707                mSize = 0;
1708                mOffset = 0;
1709            }
1710
1711            Chunk(size_t size) {
1712                mSize = size;
1713                mpData = (char*) malloc(size);
1714                mOffset = 0;
1715            }
1716
1717            ~Chunk() {
1718                // Doesn't deallocate memory.
1719            }
1720
1721            void* allocate(size_t size, size_t alignment) {
1722                size_t alignedOffset = aligned(mOffset, alignment);
1723                void* result = mpData + alignedOffset;
1724                mOffset = alignedOffset + size;
1725                return result;
1726            }
1727
1728            void free() {
1729                if (mpData) {
1730                    ::free(mpData);
1731                    mpData = 0;
1732                }
1733            }
1734
1735            size_t remainingCapacity(size_t alignment) {
1736                return aligned(mSize, alignment) - aligned(mOffset, alignment);
1737            }
1738
1739            // Assume alignment is a power of two
1740            inline size_t aligned(size_t v, size_t alignment) {
1741                size_t mask = alignment-1;
1742                return (v + mask) & ~mask;
1743            }
1744
1745            char* mpData;
1746            size_t mSize;
1747            size_t mOffset;
1748        };
1749
1750        size_t mCurrentChunk;
1751
1752        Vector<Chunk> mData;
1753    };
1754
1755    struct VariableInfo;
1756
1757    struct Token {
1758        int hash;
1759        size_t length;
1760        char* pText;
1761        tokenid_t id;
1762
1763        // Current values for the token
1764        char* mpMacroDefinition;
1765        VariableInfo* mpVariableInfo;
1766    };
1767
1768    class TokenTable {
1769    public:
1770        // Don't use 0..0xff, allows characters and operators to be tokens too.
1771
1772        static const int TOKEN_BASE = 0x100;
1773        TokenTable() {
1774            mpMap = hashmapCreate(128, hashFn, equalsFn);
1775        }
1776
1777        ~TokenTable() {
1778            hashmapFree(mpMap);
1779        }
1780
1781        void setArena(Arena* pArena) {
1782            mpArena = pArena;
1783        }
1784
1785        // Returns a token for a given string of characters.
1786        tokenid_t intern(const char* pText, size_t length) {
1787            Token probe;
1788            int hash = hashmapHash((void*) pText, length);
1789            {
1790                Token probe;
1791                probe.hash = hash;
1792                probe.length = length;
1793                probe.pText = (char*) pText;
1794                Token* pValue = (Token*) hashmapGet(mpMap, &probe);
1795                if (pValue) {
1796                    return pValue->id;
1797                }
1798            }
1799
1800            Token* pToken = (Token*) mpArena->alloc(sizeof(Token));
1801            memset(pToken, 0, sizeof(*pToken));
1802            pToken->hash = hash;
1803            pToken->length = length;
1804            pToken->pText = (char*) mpArena->alloc(length + 1);
1805            memcpy(pToken->pText, pText, length);
1806            pToken->pText[length] = 0;
1807            pToken->id = mTokens.size() + TOKEN_BASE;
1808            mTokens.push_back(pToken);
1809            hashmapPut(mpMap, pToken, pToken);
1810            return pToken->id;
1811        }
1812
1813        // Return the Token for a given tokenid.
1814        Token& operator[](tokenid_t id) {
1815            return *mTokens[id - TOKEN_BASE];
1816        }
1817
1818        inline size_t size() {
1819            return mTokens.size();
1820        }
1821
1822    private:
1823
1824        static int hashFn(void* pKey) {
1825            Token* pToken = (Token*) pKey;
1826            return pToken->hash;
1827        }
1828
1829        static bool equalsFn(void* keyA, void* keyB) {
1830            Token* pTokenA = (Token*) keyA;
1831            Token* pTokenB = (Token*) keyB;
1832            // Don't need to compare hash values, they should always be equal
1833            return pTokenA->length == pTokenB->length
1834                && strcmp(pTokenA->pText, pTokenB->pText) == 0;
1835        }
1836
1837        Hashmap* mpMap;
1838        Vector<Token*> mTokens;
1839        Arena* mpArena;
1840    };
1841
1842    class InputStream {
1843    public:
1844        int getChar() {
1845            if (bumpLine) {
1846                line++;
1847                bumpLine = false;
1848            }
1849            int ch = get();
1850            if (ch == '\n') {
1851                bumpLine = true;
1852            }
1853            return ch;
1854        }
1855        int getLine() {
1856            return line;
1857        }
1858    protected:
1859        InputStream() :
1860            line(1), bumpLine(false) {
1861        }
1862    private:
1863        virtual int get() = 0;
1864        int line;
1865        bool bumpLine;
1866    };
1867
1868    class FileInputStream : public InputStream {
1869    public:
1870        FileInputStream(FILE* in) : f(in) {}
1871    private:
1872        virtual int get() { return fgetc(f); }
1873        FILE* f;
1874    };
1875
1876    class TextInputStream : public InputStream {
1877    public:
1878        TextInputStream(const char* text, size_t textLength)
1879            : pText(text), mTextLength(textLength), mPosition(0) {
1880        }
1881
1882    private:
1883        virtual int get() {
1884            return mPosition < mTextLength ? pText[mPosition++] : EOF;
1885        }
1886
1887        const char* pText;
1888        size_t mTextLength;
1889        size_t mPosition;
1890    };
1891
1892    class String {
1893    public:
1894        String() {
1895            mpBase = 0;
1896            mUsed = 0;
1897            mSize = 0;
1898        }
1899
1900        String(const char* item, int len, bool adopt) {
1901            if (len < 0) {
1902                len = strlen(item);
1903            }
1904            if (adopt) {
1905                mpBase = (char*) item;
1906                mUsed = len;
1907                mSize = len + 1;
1908            } else {
1909                mpBase = 0;
1910                mUsed = 0;
1911                mSize = 0;
1912                appendBytes(item, len);
1913            }
1914        }
1915
1916        String(const String& other) {
1917            mpBase = 0;
1918            mUsed = 0;
1919            mSize = 0;
1920            appendBytes(other.getUnwrapped(), other.len());
1921        }
1922
1923        ~String() {
1924            if (mpBase) {
1925                free(mpBase);
1926            }
1927        }
1928
1929        String& operator=(const String& other) {
1930            clear();
1931            appendBytes(other.getUnwrapped(), other.len());
1932            return *this;
1933        }
1934
1935        inline char* getUnwrapped() const {
1936            return mpBase;
1937        }
1938
1939        void clear() {
1940            mUsed = 0;
1941            if (mSize > 0) {
1942                mpBase[0] = 0;
1943            }
1944        }
1945
1946        void appendCStr(const char* s) {
1947            appendBytes(s, strlen(s));
1948        }
1949
1950        void appendBytes(const char* s, int n) {
1951            memcpy(ensure(n), s, n + 1);
1952        }
1953
1954        void append(char c) {
1955            * ensure(1) = c;
1956        }
1957
1958        void append(String& other) {
1959            appendBytes(other.getUnwrapped(), other.len());
1960        }
1961
1962        char* orphan() {
1963            char* result = mpBase;
1964            mpBase = 0;
1965            mUsed = 0;
1966            mSize = 0;
1967            return result;
1968        }
1969
1970        void printf(const char* fmt,...) {
1971            va_list ap;
1972            va_start(ap, fmt);
1973            vprintf(fmt, ap);
1974            va_end(ap);
1975        }
1976
1977        void vprintf(const char* fmt, va_list ap) {
1978            char* temp;
1979            int numChars = vasprintf(&temp, fmt, ap);
1980            memcpy(ensure(numChars), temp, numChars+1);
1981            free(temp);
1982        }
1983
1984        inline size_t len() const {
1985            return mUsed;
1986        }
1987
1988    private:
1989        char* ensure(int n) {
1990            size_t newUsed = mUsed + n;
1991            if (newUsed > mSize) {
1992                size_t newSize = mSize * 2 + 10;
1993                if (newSize < newUsed) {
1994                    newSize = newUsed;
1995                }
1996                mpBase = (char*) realloc(mpBase, newSize + 1);
1997                mSize = newSize;
1998            }
1999            mpBase[newUsed] = '\0';
2000            char* result = mpBase + mUsed;
2001            mUsed = newUsed;
2002            return result;
2003        }
2004
2005        char* mpBase;
2006        size_t mUsed;
2007        size_t mSize;
2008    };
2009
2010    void internKeywords() {
2011        // Note: order has to match TOK_ constants
2012        static const char* keywords[] = {
2013            "int",
2014            "char",
2015            "void",
2016            "if",
2017            "else",
2018            "while",
2019            "break",
2020            "return",
2021            "for",
2022            "pragma",
2023            "define",
2024            "auto",
2025            "case",
2026            "const",
2027            "continue",
2028            "default",
2029            "do",
2030            "double",
2031            "enum",
2032            "extern",
2033            "float",
2034            "goto",
2035            "long",
2036            "register",
2037            "short",
2038            "signed",
2039            "sizeof",
2040            "static",
2041            "struct",
2042            "switch",
2043            "typedef",
2044            "union",
2045            "unsigned",
2046            "volatile",
2047            "_Bool",
2048            "_Complex",
2049            "_Imaginary",
2050            "inline",
2051            "restrict",
2052            0};
2053
2054        for(int i = 0; keywords[i]; i++) {
2055            mTokenTable.intern(keywords[i], strlen(keywords[i]));
2056        }
2057    }
2058
2059    struct InputState {
2060        InputStream* pStream;
2061        int oldCh;
2062    };
2063
2064    struct VariableInfo {
2065        void* pAddress;
2066        void* pForward; // For a forward direction, linked list of data to fix up
2067        tokenid_t tok;
2068        size_t level;
2069        VariableInfo* pOldDefinition;
2070        Type* pType;
2071    };
2072
2073    class SymbolStack {
2074    public:
2075        SymbolStack() {
2076            mpArena = 0;
2077            mpTokenTable = 0;
2078        }
2079
2080        void setArena(Arena* pArena) {
2081            mpArena = pArena;
2082        }
2083
2084        void setTokenTable(TokenTable* pTokenTable) {
2085            mpTokenTable = pTokenTable;
2086        }
2087
2088        void pushLevel() {
2089            Mark mark;
2090            mark.mArenaMark = mpArena->mark();
2091            mark.mSymbolHead = mStack.size();
2092            mLevelStack.push_back(mark);
2093        }
2094
2095        void popLevel() {
2096            // Undo any shadowing that was done:
2097            Mark mark = mLevelStack.back();
2098            mLevelStack.pop_back();
2099            while (mStack.size() > mark.mSymbolHead) {
2100                VariableInfo* pV = mStack.back();
2101                mStack.pop_back();
2102                (*mpTokenTable)[pV->tok].mpVariableInfo = pV->pOldDefinition;
2103            }
2104            mpArena->freeToMark(mark.mArenaMark);
2105        }
2106
2107        bool isDefinedAtCurrentLevel(tokenid_t tok) {
2108            VariableInfo* pV = (*mpTokenTable)[tok].mpVariableInfo;
2109            return pV && pV->level == level();
2110        }
2111
2112        VariableInfo* add(tokenid_t tok) {
2113            Token& token = (*mpTokenTable)[tok];
2114            VariableInfo* pOldV = token.mpVariableInfo;
2115            VariableInfo* pNewV =
2116                (VariableInfo*) mpArena->alloc(sizeof(VariableInfo));
2117            memset(pNewV, 0, sizeof(VariableInfo));
2118            pNewV->tok = tok;
2119            pNewV->level = level();
2120            pNewV->pOldDefinition = pOldV;
2121            token.mpVariableInfo = pNewV;
2122            mStack.push_back(pNewV);
2123            return pNewV;
2124        }
2125
2126        VariableInfo* add(Type* pType) {
2127            VariableInfo* pVI = add(pType->id);
2128            pVI->pType = pType;
2129            return pVI;
2130        }
2131
2132        void forEach(bool (*fn)(VariableInfo*, void*), void* context) {
2133            for (size_t i = 0; i < mStack.size(); i++) {
2134                if (! fn(mStack[i], context)) {
2135                    break;
2136                }
2137            }
2138        }
2139
2140    private:
2141        inline size_t level() {
2142            return mLevelStack.size();
2143        }
2144
2145        struct Mark {
2146            Arena::Mark mArenaMark;
2147            size_t mSymbolHead;
2148        };
2149
2150        Arena* mpArena;
2151        TokenTable* mpTokenTable;
2152        Vector<VariableInfo*> mStack;
2153        Vector<Mark> mLevelStack;
2154    };
2155
2156    int ch; // Current input character, or EOF
2157    tokenid_t tok;      // token
2158    intptr_t tokc;    // token extra info
2159    double tokd;     // floating point constant value
2160    int tokl;         // token operator level
2161    intptr_t rsym; // return symbol
2162    Type* pReturnType; // type of the current function's return.
2163    intptr_t loc; // local variable index
2164    char* glo;  // global variable index
2165    String mTokenString;
2166    char* dptr; // Macro state: Points to macro text during macro playback.
2167    int dch;    // Macro state: Saves old value of ch during a macro playback.
2168    char* pGlobalBase;
2169
2170    // Arena for the duration of the compile
2171    Arena mGlobalArena;
2172    // Arena for data that's only needed when compiling a single function
2173    Arena mLocalArena;
2174
2175    TokenTable mTokenTable;
2176    SymbolStack mGlobals;
2177    SymbolStack mLocals;
2178
2179    // Prebuilt types, makes things slightly faster.
2180    Type* mkpInt;        // int
2181    Type* mkpChar;       // char
2182    Type* mkpVoid;       // void
2183    Type* mkpFloat;
2184    Type* mkpDouble;
2185    Type* mkpIntFn;
2186    Type* mkpIntPtr;
2187    Type* mkpCharPtr;
2188    Type* mkpPtrIntFn;
2189
2190    InputStream* file;
2191
2192    CodeBuf codeBuf;
2193    CodeGenerator* pGen;
2194
2195    String mErrorBuf;
2196
2197    String mPragmas;
2198    int mPragmaStringCount;
2199
2200    static const int ALLOC_SIZE = 99999;
2201
2202    static const int TOK_DUMMY = 1;
2203    static const int TOK_NUM = 2;
2204    static const int TOK_NUM_FLOAT = 3;
2205    static const int TOK_NUM_DOUBLE = 4;
2206
2207    // 3..255 are character and/or operators
2208
2209    // Keywords start at 0x100 and increase by 1
2210    // Order has to match string list in "internKeywords".
2211    enum {
2212        TOK_KEYWORD = TokenTable::TOKEN_BASE,
2213        TOK_INT = TOK_KEYWORD,
2214        TOK_CHAR,
2215        TOK_VOID,
2216        TOK_IF,
2217        TOK_ELSE,
2218        TOK_WHILE,
2219        TOK_BREAK,
2220        TOK_RETURN,
2221        TOK_FOR,
2222        TOK_PRAGMA,
2223        TOK_DEFINE,
2224        TOK_AUTO,
2225        TOK_CASE,
2226        TOK_CONST,
2227        TOK_CONTINUE,
2228        TOK_DEFAULT,
2229        TOK_DO,
2230        TOK_DOUBLE,
2231        TOK_ENUM,
2232        TOK_EXTERN,
2233        TOK_FLOAT,
2234        TOK_GOTO,
2235        TOK_LONG,
2236        TOK_REGISTER,
2237        TOK_SHORT,
2238        TOK_SIGNED,
2239        TOK_SIZEOF,
2240        TOK_STATIC,
2241        TOK_STRUCT,
2242        TOK_SWITCH,
2243        TOK_TYPEDEF,
2244        TOK_UNION,
2245        TOK_UNSIGNED,
2246        TOK_VOLATILE,
2247        TOK__BOOL,
2248        TOK__COMPLEX,
2249        TOK__IMAGINARY,
2250        TOK_INLINE,
2251        TOK_RESTRICT,
2252        // Symbols start after tokens
2253        TOK_SYMBOL
2254    };
2255
2256    static const int LOCAL = 0x200;
2257
2258    static const int SYM_FORWARD = 0;
2259    static const int SYM_DEFINE = 1;
2260
2261    /* tokens in string heap */
2262    static const int TAG_TOK = ' ';
2263
2264    static const int OP_INCREMENT = 0;
2265    static const int OP_DECREMENT = 1;
2266    static const int OP_MUL = 2;
2267    static const int OP_DIV = 3;
2268    static const int OP_MOD = 4;
2269    static const int OP_PLUS = 5;
2270    static const int OP_MINUS = 6;
2271    static const int OP_SHIFT_LEFT = 7;
2272    static const int OP_SHIFT_RIGHT = 8;
2273    static const int OP_LESS_EQUAL = 9;
2274    static const int OP_GREATER_EQUAL = 10;
2275    static const int OP_LESS = 11;
2276    static const int OP_GREATER = 12;
2277    static const int OP_EQUALS = 13;
2278    static const int OP_NOT_EQUALS = 14;
2279    static const int OP_LOGICAL_AND = 15;
2280    static const int OP_LOGICAL_OR = 16;
2281    static const int OP_BIT_AND = 17;
2282    static const int OP_BIT_XOR = 18;
2283    static const int OP_BIT_OR = 19;
2284    static const int OP_BIT_NOT = 20;
2285    static const int OP_LOGICAL_NOT = 21;
2286    static const int OP_COUNT = 22;
2287
2288    /* Operators are searched from front, the two-character operators appear
2289     * before the single-character operators with the same first character.
2290     * @ is used to pad out single-character operators.
2291     */
2292    static const char* operatorChars;
2293    static const char operatorLevel[];
2294
2295    /* Called when we detect an internal problem. Does nothing in production.
2296     *
2297     */
2298    void internalError() {
2299        * (char*) 0 = 0;
2300    }
2301
2302    void assert(bool isTrue) {
2303        if (!isTrue) {
2304            internalError();
2305        }
2306    }
2307
2308    bool isSymbol(tokenid_t t) {
2309        return t >= TOK_SYMBOL &&
2310            ((size_t) (t-TOK_SYMBOL)) < mTokenTable.size();
2311    }
2312
2313    bool isSymbolOrKeyword(tokenid_t t) {
2314        return t >= TOK_KEYWORD &&
2315            ((size_t) (t-TOK_KEYWORD)) < mTokenTable.size();
2316    }
2317
2318    VariableInfo* VI(tokenid_t t) {
2319        assert(isSymbol(t));
2320        VariableInfo* pV = mTokenTable[t].mpVariableInfo;
2321        if (pV && pV->tok != t) {
2322            internalError();
2323        }
2324        return pV;
2325    }
2326
2327    inline bool isDefined(tokenid_t t) {
2328        return t >= TOK_SYMBOL && VI(t) != 0;
2329    }
2330
2331    const char* nameof(tokenid_t t) {
2332        assert(isSymbolOrKeyword(t));
2333        return mTokenTable[t].pText;
2334    }
2335
2336    void pdef(int t) {
2337        mTokenString.append(t);
2338    }
2339
2340    void inp() {
2341        if (dptr) {
2342            ch = *dptr++;
2343            if (ch == 0) {
2344                dptr = 0;
2345                ch = dch;
2346            }
2347        } else
2348            ch = file->getChar();
2349#if 0
2350        printf("ch='%c' 0x%x\n", ch, ch);
2351#endif
2352    }
2353
2354    int isid() {
2355        return isalnum(ch) | (ch == '_');
2356    }
2357
2358    /* read a character constant, advances ch to after end of constant */
2359    int getq() {
2360        int val = ch;
2361        if (ch == '\\') {
2362            inp();
2363            if (isoctal(ch)) {
2364                // 1 to 3 octal characters.
2365                val = 0;
2366                for(int i = 0; i < 3; i++) {
2367                    if (isoctal(ch)) {
2368                        val = (val << 3) + ch - '0';
2369                        inp();
2370                    }
2371                }
2372                return val;
2373            } else if (ch == 'x' || ch == 'X') {
2374                // N hex chars
2375                inp();
2376                if (! isxdigit(ch)) {
2377                    error("'x' character escape requires at least one digit.");
2378                } else {
2379                    val = 0;
2380                    while (isxdigit(ch)) {
2381                        int d = ch;
2382                        if (isdigit(d)) {
2383                            d -= '0';
2384                        } else if (d <= 'F') {
2385                            d = d - 'A' + 10;
2386                        } else {
2387                            d = d - 'a' + 10;
2388                        }
2389                        val = (val << 4) + d;
2390                        inp();
2391                    }
2392                }
2393            } else {
2394                int val = ch;
2395                switch (ch) {
2396                    case 'a':
2397                        val = '\a';
2398                        break;
2399                    case 'b':
2400                        val = '\b';
2401                        break;
2402                    case 'f':
2403                        val = '\f';
2404                        break;
2405                    case 'n':
2406                        val = '\n';
2407                        break;
2408                    case 'r':
2409                        val = '\r';
2410                        break;
2411                    case 't':
2412                        val = '\t';
2413                        break;
2414                    case 'v':
2415                        val = '\v';
2416                        break;
2417                    case '\\':
2418                        val = '\\';
2419                        break;
2420                    case '\'':
2421                        val = '\'';
2422                        break;
2423                    case '"':
2424                        val = '"';
2425                        break;
2426                    case '?':
2427                        val = '?';
2428                        break;
2429                    default:
2430                        error("Undefined character escape %c", ch);
2431                        break;
2432                }
2433                inp();
2434                return val;
2435            }
2436        } else {
2437            inp();
2438        }
2439        return val;
2440    }
2441
2442    static bool isoctal(int ch) {
2443        return ch >= '0' && ch <= '7';
2444    }
2445
2446    bool acceptCh(int c) {
2447        bool result = c == ch;
2448        if (result) {
2449            pdef(ch);
2450            inp();
2451        }
2452        return result;
2453    }
2454
2455    bool acceptDigitsCh() {
2456        bool result = false;
2457        while (isdigit(ch)) {
2458            result = true;
2459            pdef(ch);
2460            inp();
2461        }
2462        return result;
2463    }
2464
2465    void parseFloat() {
2466        tok = TOK_NUM_DOUBLE;
2467        // mTokenString already has the integral part of the number.
2468        acceptCh('.');
2469        acceptDigitsCh();
2470        bool doExp = true;
2471        if (acceptCh('e') || acceptCh('E')) {
2472            // Don't need to do any extra work
2473        } else if (ch == 'f' || ch == 'F') {
2474            pdef('e'); // So it can be parsed by strtof.
2475            inp();
2476            tok = TOK_NUM_FLOAT;
2477        } else {
2478            doExp = false;
2479        }
2480        if (doExp) {
2481            bool digitsRequired = acceptCh('-');
2482            bool digitsFound = acceptDigitsCh();
2483            if (digitsRequired && ! digitsFound) {
2484                error("malformed exponent");
2485            }
2486        }
2487        char* pText = mTokenString.getUnwrapped();
2488        if (tok == TOK_NUM_FLOAT) {
2489            tokd = strtof(pText, 0);
2490        } else {
2491            tokd = strtod(pText, 0);
2492        }
2493        //fprintf(stderr, "float constant: %s (%d) %g\n", pText, tok, tokd);
2494    }
2495
2496    void next() {
2497        int l, a;
2498
2499        while (isspace(ch) | (ch == '#')) {
2500            if (ch == '#') {
2501                inp();
2502                next();
2503                if (tok == TOK_DEFINE) {
2504                    doDefine();
2505                } else if (tok == TOK_PRAGMA) {
2506                    doPragma();
2507                } else {
2508                    error("Unsupported preprocessor directive \"%s\"",
2509                          mTokenString.getUnwrapped());
2510                }
2511            }
2512            inp();
2513        }
2514        tokl = 0;
2515        tok = ch;
2516        /* encode identifiers & numbers */
2517        if (isid()) {
2518            mTokenString.clear();
2519            while (isid()) {
2520                pdef(ch);
2521                inp();
2522            }
2523            if (isdigit(tok)) {
2524                // Start of a numeric constant. Could be integer, float, or
2525                // double, won't know until we look further.
2526                if (ch == '.' || ch == 'e' || ch == 'e'
2527                    || ch == 'f' || ch == 'F')  {
2528                    parseFloat();
2529                } else {
2530                    // It's an integer constant
2531                    tokc = strtol(mTokenString.getUnwrapped(), 0, 0);
2532                    tok = TOK_NUM;
2533                }
2534            } else {
2535                tok = mTokenTable.intern(mTokenString.getUnwrapped(),
2536                                         mTokenString.len());
2537                // Is this a macro?
2538                char* pMacroDefinition = mTokenTable[tok].mpMacroDefinition;
2539                if(pMacroDefinition) {
2540                    // Yes, it is a macro
2541                    dptr = pMacroDefinition;
2542                    dch = ch;
2543                    inp();
2544                    next();
2545                }
2546            }
2547        } else {
2548            inp();
2549            if (tok == '\'') {
2550                tok = TOK_NUM;
2551                tokc = getq();
2552                if (ch != '\'') {
2553                    error("Expected a ' character, got %c", ch);
2554                } else {
2555                  inp();
2556                }
2557            } else if ((tok == '/') & (ch == '*')) {
2558                inp();
2559                while (ch && ch != EOF) {
2560                    while (ch != '*' && ch != EOF)
2561                        inp();
2562                    inp();
2563                    if (ch == '/')
2564                        ch = 0;
2565                }
2566                if (ch == EOF) {
2567                    error("End of file inside comment.");
2568                }
2569                inp();
2570                next();
2571            } else if ((tok == '/') & (ch == '/')) {
2572                inp();
2573                while (ch && (ch != '\n') && (ch != EOF)) {
2574                    inp();
2575                }
2576                inp();
2577                next();
2578            } else {
2579                const char* t = operatorChars;
2580                int opIndex = 0;
2581                while ((l = *t++) != 0) {
2582                    a = *t++;
2583                    tokl = operatorLevel[opIndex];
2584                    tokc = opIndex;
2585                    if ((l == tok) & ((a == ch) | (a == '@'))) {
2586#if 0
2587                        printf("%c%c -> tokl=%d tokc=0x%x\n",
2588                                l, a, tokl, tokc);
2589#endif
2590                        if (a == ch) {
2591                            inp();
2592                            tok = TOK_DUMMY; /* dummy token for double tokens */
2593                        }
2594                        break;
2595                    }
2596                    opIndex++;
2597                }
2598                if (l == 0) {
2599                    tokl = 0;
2600                    tokc = 0;
2601                }
2602            }
2603        }
2604#if 0
2605        {
2606            String buf;
2607            decodeToken(buf, tok);
2608            fprintf(stderr, "%s\n", buf.getUnwrapped());
2609        }
2610#endif
2611    }
2612
2613    void doDefine() {
2614        next();
2615        tokenid_t name = tok;
2616        String* pName = new String();
2617        while (isspace(ch)) {
2618            inp();
2619        }
2620        if (ch == '(') {
2621            delete pName;
2622            error("Defines with arguments not supported");
2623            return;
2624        }
2625        while (isspace(ch)) {
2626            inp();
2627        }
2628        String value;
2629        while (ch != '\n' && ch != EOF) {
2630            value.append(ch);
2631            inp();
2632        }
2633        char* pDefn = (char*)mGlobalArena.alloc(value.len() + 1);
2634        memcpy(pDefn, value.getUnwrapped(), value.len());
2635        pDefn[value.len()] = 0;
2636        mTokenTable[name].mpMacroDefinition = pDefn;
2637    }
2638
2639    void doPragma() {
2640        // # pragma name(val)
2641        int state = 0;
2642        while(ch != EOF && ch != '\n' && state < 10) {
2643            switch(state) {
2644                case 0:
2645                    if (isspace(ch)) {
2646                        inp();
2647                    } else {
2648                        state++;
2649                    }
2650                    break;
2651                case 1:
2652                    if (isalnum(ch)) {
2653                        mPragmas.append(ch);
2654                        inp();
2655                    } else if (ch == '(') {
2656                        mPragmas.append(0);
2657                        inp();
2658                        state++;
2659                    } else {
2660                        state = 11;
2661                    }
2662                    break;
2663                case 2:
2664                    if (isalnum(ch)) {
2665                        mPragmas.append(ch);
2666                        inp();
2667                    } else if (ch == ')') {
2668                        mPragmas.append(0);
2669                        inp();
2670                        state = 10;
2671                    } else {
2672                        state = 11;
2673                    }
2674                    break;
2675            }
2676        }
2677        if(state != 10) {
2678            error("Unexpected pragma syntax");
2679        }
2680        mPragmaStringCount += 2;
2681    }
2682
2683    virtual void verror(const char* fmt, va_list ap) {
2684        mErrorBuf.printf("%ld: ", file->getLine());
2685        mErrorBuf.vprintf(fmt, ap);
2686        mErrorBuf.printf("\n");
2687    }
2688
2689    void skip(intptr_t c) {
2690        if (tok != c) {
2691            error("'%c' expected", c);
2692        }
2693        next();
2694    }
2695
2696    bool accept(intptr_t c) {
2697        if (tok == c) {
2698            next();
2699            return true;
2700        }
2701        return false;
2702    }
2703
2704    bool acceptStringLiteral() {
2705        if (tok == '"') {
2706            pGen->li((int) glo, mkpCharPtr);
2707            // This while loop merges multiple adjacent string constants.
2708            while (tok == '"') {
2709                while (ch != '"' && ch != EOF) {
2710                    *allocGlobalSpace(1) = getq();
2711                }
2712                if (ch != '"') {
2713                    error("Unterminated string constant.");
2714                }
2715                inp();
2716                next();
2717            }
2718            /* Null terminate */
2719            *glo = 0;
2720            /* align heap */
2721            allocGlobalSpace((char*) (((intptr_t) glo + 4) & -4) - glo);
2722
2723            return true;
2724        }
2725        return false;
2726    }
2727    /* Parse and evaluate a unary expression.
2728     * allowAssignment is true if '=' parsing wanted (quick hack)
2729     */
2730    void unary(bool allowAssignment) {
2731        intptr_t n, t, a;
2732        t = 0;
2733        n = 1; /* type of expression 0 = forward, 1 = value, other = lvalue */
2734        if (acceptStringLiteral()) {
2735            // Nothing else to do.
2736        } else {
2737            int c = tokl;
2738            a = tokc;
2739            double ad = tokd;
2740            t = tok;
2741            next();
2742            if (t == TOK_NUM) {
2743                pGen->li(a, mkpInt);
2744            } else if (t == TOK_NUM_FLOAT) {
2745                pGen->lif(ad, mkpFloat);
2746            } else if (t == TOK_NUM_DOUBLE) {
2747                pGen->lid(ad, mkpDouble);
2748            } else if (c == 2) {
2749                /* -, +, !, ~ */
2750                unary(false);
2751                if (t == '!')
2752                    pGen->gUnaryCmp(a);
2753                else
2754                    pGen->genUnaryOp(a);
2755            } else if (t == '(') {
2756                expr();
2757                skip(')');
2758            } else if (t == '*') {
2759                /* This is a pointer dereference, but we currently only
2760                 * support a pointer dereference if it's immediately
2761                 * in front of a cast. So parse the cast right here.
2762                 */
2763                skip('(');
2764                Type* pCast = expectCastTypeDeclaration(mLocalArena);
2765                // We currently only handle 3 types of cast:
2766                // (int*), (char*) , (int (*)())
2767                if(typeEqual(pCast, mkpIntPtr)) {
2768                    t = TOK_INT;
2769                } else if (typeEqual(pCast, mkpCharPtr)) {
2770                    t = TOK_CHAR;
2771                } else if (typeEqual(pCast, mkpPtrIntFn)){
2772                    t = 0;
2773                } else {
2774                    String buffer;
2775                    decodeType(buffer, pCast);
2776                    error("Unsupported cast type %s", buffer.getUnwrapped());
2777                    decodeType(buffer, mkpPtrIntFn);
2778                }
2779                skip(')');
2780                unary(false);
2781                if (accept('=')) {
2782                    pGen->pushR0();
2783                    expr();
2784                    pGen->storeR0ToTOS(pCast);
2785                } else if (t) {
2786                    pGen->loadR0FromR0(pCast);
2787                }
2788                // Else we fall through to the function call below, with
2789                // t == 0 to trigger an indirect function call. Hack!
2790            } else if (t == '&') {
2791                VariableInfo* pVI = VI(tok);
2792                pGen->leaR0((int) pVI->pAddress,
2793                            createPtrType(pVI->pType, mLocalArena));
2794                next();
2795            } else if (t == EOF ) {
2796                error("Unexpected EOF.");
2797            } else if (!checkSymbol(t)) {
2798                // Don't have to do anything special here, the error
2799                // message was printed by checkSymbol() above.
2800            } else {
2801                if (!isDefined(t)) {
2802                    mGlobals.add(t);
2803                    // printf("Adding new global function %s\n", nameof(t));
2804                }
2805                VariableInfo* pVI = VI(t);
2806                n = (intptr_t) pVI->pAddress;
2807                /* forward reference: try dlsym */
2808                if (!n) {
2809                    n = (intptr_t) dlsym(RTLD_DEFAULT, nameof(t));
2810                    pVI->pType = mkpIntFn;
2811                    pVI->pAddress = (void*) n;
2812                }
2813                if ((tok == '=') & allowAssignment) {
2814                    /* assignment */
2815                    next();
2816                    expr();
2817                    pGen->storeR0(n);
2818                } else if (tok != '(') {
2819                    /* variable */
2820                    if (!n) {
2821                        error("Undefined variable %s", nameof(t));
2822                    }
2823                    pGen->loadR0(n, tokl == 11, tokc, pVI->pType);
2824                    if (tokl == 11) {
2825                        next();
2826                    }
2827                }
2828            }
2829        }
2830
2831        /* function call */
2832        if (accept('(')) {
2833            if (n == 1)
2834                pGen->pushR0();
2835
2836            /* push args and invert order */
2837            a = pGen->beginFunctionCallArguments();
2838            int l = 0;
2839            while (tok != ')' && tok != EOF) {
2840                expr();
2841                pGen->storeR0ToArg(l);
2842                l = l + 4;
2843                if (accept(',')) {
2844                    // fine
2845                } else if ( tok != ')') {
2846                    error("Expected ',' or ')'");
2847                }
2848            }
2849            pGen->endFunctionCallArguments(a, l);
2850            skip(')');
2851            if (!n) {
2852                /* forward reference */
2853                VariableInfo* pVI = VI(t);
2854                pVI->pForward = (void*) pGen->callForward((int) pVI->pForward,
2855                                                          pVI->pType);
2856            } else if (n == 1) {
2857                pGen->callIndirect(l, mkpPtrIntFn->pHead);
2858            } else {
2859                pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset(),
2860                                   VI(t)->pType);
2861            }
2862            pGen->adjustStackAfterCall(l, n == 1);
2863        }
2864    }
2865
2866    /* Recursive descent parser for binary operations.
2867     */
2868    void binaryOp(int level) {
2869        intptr_t t, n, a;
2870        t = 0;
2871        if (level-- == 1)
2872            unary(true);
2873        else {
2874            binaryOp(level);
2875            a = 0;
2876            while (level == tokl) {
2877                n = tok;
2878                t = tokc;
2879                next();
2880
2881                if (level > 8) {
2882                    a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
2883                    binaryOp(level);
2884                } else {
2885                    pGen->pushR0();
2886                    binaryOp(level);
2887
2888                    if ((level == 4) | (level == 5)) {
2889                        pGen->gcmp(t);
2890                    } else {
2891                        pGen->genOp(t);
2892                    }
2893                }
2894            }
2895            /* && and || output code generation */
2896            if (a && level > 8) {
2897                a = pGen->gtst(t == OP_LOGICAL_OR, a);
2898                pGen->li(t != OP_LOGICAL_OR, mkpInt);
2899                pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
2900                pGen->gsym(a);
2901                pGen->li(t == OP_LOGICAL_OR, mkpInt);
2902            }
2903        }
2904    }
2905
2906    void expr() {
2907        binaryOp(11);
2908    }
2909
2910    int test_expr() {
2911        expr();
2912        return pGen->gtst(0, 0);
2913    }
2914
2915    void block(intptr_t l, bool outermostFunctionBlock) {
2916        intptr_t a, n, t;
2917
2918        Type* pBaseType;
2919        if ((pBaseType = acceptPrimitiveType(mLocalArena))) {
2920            /* declarations */
2921            localDeclarations(pBaseType);
2922        } else if (tok == TOK_IF) {
2923            next();
2924            skip('(');
2925            a = test_expr();
2926            skip(')');
2927            block(l, false);
2928            if (tok == TOK_ELSE) {
2929                next();
2930                n = pGen->gjmp(0); /* jmp */
2931                pGen->gsym(a);
2932                block(l, false);
2933                pGen->gsym(n); /* patch else jmp */
2934            } else {
2935                pGen->gsym(a); /* patch if test */
2936            }
2937        } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
2938            t = tok;
2939            next();
2940            skip('(');
2941            if (t == TOK_WHILE) {
2942                n = codeBuf.getPC(); // top of loop, target of "next" iteration
2943                a = test_expr();
2944            } else {
2945                if (tok != ';')
2946                    expr();
2947                skip(';');
2948                n = codeBuf.getPC();
2949                a = 0;
2950                if (tok != ';')
2951                    a = test_expr();
2952                skip(';');
2953                if (tok != ')') {
2954                    t = pGen->gjmp(0);
2955                    expr();
2956                    pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
2957                    pGen->gsym(t);
2958                    n = t + 4;
2959                }
2960            }
2961            skip(')');
2962            block((intptr_t) &a, false);
2963            pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
2964            pGen->gsym(a);
2965        } else if (tok == '{') {
2966            if (! outermostFunctionBlock) {
2967                mLocals.pushLevel();
2968            }
2969            next();
2970            while (tok != '}' && tok != EOF)
2971                block(l, false);
2972            skip('}');
2973            if (! outermostFunctionBlock) {
2974                mLocals.popLevel();
2975            }
2976        } else {
2977            if (accept(TOK_RETURN)) {
2978                if (tok != ';') {
2979                    expr();
2980                    pGen->convertR0(pReturnType);
2981                }
2982                rsym = pGen->gjmp(rsym); /* jmp */
2983            } else if (accept(TOK_BREAK)) {
2984                *(int *) l = pGen->gjmp(*(int *) l);
2985            } else if (tok != ';')
2986                expr();
2987            skip(';');
2988        }
2989    }
2990
2991    bool typeEqual(Type* a, Type* b) {
2992        if (a == b) {
2993            return true;
2994        }
2995        if (a == NULL || b == NULL) {
2996            return false;
2997        }
2998        TypeTag at = a->tag;
2999        if (at != b->tag) {
3000            return false;
3001        }
3002        if (at == TY_POINTER) {
3003            return typeEqual(a->pHead, b->pHead);
3004        } else if (at == TY_FUNC || at == TY_PARAM) {
3005            return typeEqual(a->pHead, b->pHead)
3006                && typeEqual(a->pTail, b->pTail);
3007        }
3008        return true;
3009    }
3010
3011    Type* createType(TypeTag tag, Type* pHead, Type* pTail, Arena& arena) {
3012        assert(tag >= TY_INT && tag <= TY_PARAM);
3013        Type* pType = (Type*) arena.alloc(sizeof(Type));
3014        memset(pType, 0, sizeof(*pType));
3015        pType->tag = tag;
3016        pType->pHead = pHead;
3017        pType->pTail = pTail;
3018        return pType;
3019    }
3020
3021    Type* createPtrType(Type* pType, Arena& arena) {
3022        return createType(TY_POINTER, pType, NULL, arena);
3023    }
3024
3025    /**
3026     * Try to print a type in declaration order
3027     */
3028    void decodeType(String& buffer, Type* pType) {
3029        buffer.clear();
3030        if (pType == NULL) {
3031            buffer.appendCStr("null");
3032            return;
3033        }
3034        decodeTypeImp(buffer, pType);
3035    }
3036
3037    void decodeTypeImp(String& buffer, Type* pType) {
3038        decodeTypeImpPrefix(buffer, pType);
3039
3040        String temp;
3041        if (pType->id != 0) {
3042            decodeToken(temp, pType->id);
3043            buffer.append(temp);
3044        }
3045
3046        decodeTypeImpPostfix(buffer, pType);
3047    }
3048
3049    void decodeTypeImpPrefix(String& buffer, Type* pType) {
3050        TypeTag tag = pType->tag;
3051
3052        if (tag >= TY_INT && tag <= TY_VOID) {
3053            switch (tag) {
3054                case TY_INT:
3055                    buffer.appendCStr("int");
3056                    break;
3057                case TY_CHAR:
3058                    buffer.appendCStr("char");
3059                    break;
3060                case TY_VOID:
3061                    buffer.appendCStr("void");
3062                    break;
3063                case TY_FLOAT:
3064                    buffer.appendCStr("float");
3065                    break;
3066                case TY_DOUBLE:
3067                    buffer.appendCStr("double");
3068                    break;
3069                default:
3070                    break;
3071            }
3072            buffer.append(' ');
3073        }
3074
3075        switch (tag) {
3076            case TY_INT:
3077                break;
3078            case TY_CHAR:
3079                break;
3080            case TY_VOID:
3081                 break;
3082            case TY_FLOAT:
3083                 break;
3084            case TY_DOUBLE:
3085                break;
3086            case TY_POINTER:
3087                decodeTypeImpPrefix(buffer, pType->pHead);
3088                if(pType->pHead && pType->pHead->tag == TY_FUNC) {
3089                    buffer.append('(');
3090                }
3091                buffer.append('*');
3092                break;
3093            case TY_FUNC:
3094                decodeTypeImp(buffer, pType->pHead);
3095                break;
3096            case TY_PARAM:
3097                decodeTypeImp(buffer, pType->pHead);
3098                break;
3099            default:
3100                String temp;
3101                temp.printf("Unknown tag %d", pType->tag);
3102                buffer.append(temp);
3103                break;
3104        }
3105    }
3106
3107    void decodeTypeImpPostfix(String& buffer, Type* pType) {
3108        TypeTag tag = pType->tag;
3109
3110        switch(tag) {
3111            case TY_POINTER:
3112                if(pType->pHead && pType->pHead->tag == TY_FUNC) {
3113                    buffer.append(')');
3114                }
3115                decodeTypeImpPostfix(buffer, pType->pHead);
3116                break;
3117            case TY_FUNC:
3118                buffer.append('(');
3119                for(Type* pArg = pType->pTail; pArg; pArg = pArg->pTail) {
3120                    decodeTypeImp(buffer, pArg);
3121                    if (pArg->pTail) {
3122                        buffer.appendCStr(", ");
3123                    }
3124                }
3125                buffer.append(')');
3126                break;
3127            default:
3128                break;
3129        }
3130    }
3131
3132    void printType(Type* pType) {
3133        String buffer;
3134        decodeType(buffer, pType);
3135        fprintf(stderr, "%s\n", buffer.getUnwrapped());
3136    }
3137
3138    Type* acceptPrimitiveType(Arena& arena) {
3139        Type* pType;
3140        if (tok == TOK_INT) {
3141            pType = mkpInt;
3142        } else if (tok == TOK_CHAR) {
3143            pType = mkpChar;
3144        } else if (tok == TOK_VOID) {
3145            pType = mkpVoid;
3146        } else if (tok == TOK_FLOAT) {
3147            pType = mkpFloat;
3148        } else if (tok == TOK_DOUBLE) {
3149            pType = mkpDouble;
3150        } else {
3151            return NULL;
3152        }
3153        next();
3154        return pType;
3155    }
3156
3157    Type* acceptDeclaration(Type* pType, bool nameAllowed, bool nameRequired,
3158                            Arena& arena) {
3159        tokenid_t declName = 0;
3160        pType = acceptDecl2(pType, declName, nameAllowed,
3161                                  nameRequired, arena);
3162        if (declName) {
3163            // Clone the parent type so we can set a unique ID
3164            pType = createType(pType->tag, pType->pHead,
3165                                      pType->pTail, arena);
3166
3167            pType->id = declName;
3168        }
3169        // fprintf(stderr, "Parsed a declaration:       ");
3170        // printType(pType);
3171        return pType;
3172    }
3173
3174    Type* expectDeclaration(Type* pBaseType, Arena& arena) {
3175        Type* pType = acceptDeclaration(pBaseType, true, true, arena);
3176        if (! pType) {
3177            error("Expected a declaration");
3178        }
3179        return pType;
3180    }
3181
3182    /* Used for accepting types that appear in casts */
3183    Type* acceptCastTypeDeclaration(Arena& arena) {
3184        Type* pType = acceptPrimitiveType(arena);
3185        if (pType) {
3186            pType = acceptDeclaration(pType, false, false, arena);
3187        }
3188        return pType;
3189    }
3190
3191    Type* expectCastTypeDeclaration(Arena& arena) {
3192        Type* pType = acceptCastTypeDeclaration(arena);
3193        if (! pType) {
3194            error("Expected a declaration");
3195        }
3196        return pType;
3197    }
3198
3199    Type* acceptDecl2(Type* pType, tokenid_t& declName,
3200                      bool nameAllowed, bool nameRequired, Arena& arena) {
3201        int ptrCounter = 0;
3202        while (accept('*')) {
3203            ptrCounter++;
3204        }
3205        pType = acceptDecl3(pType, declName, nameAllowed, nameRequired, arena);
3206        while (ptrCounter-- > 0) {
3207            pType = createType(TY_POINTER, pType, NULL, arena);
3208        }
3209        return pType;
3210    }
3211
3212    Type* acceptDecl3(Type* pType, tokenid_t& declName,
3213                      bool nameAllowed, bool nameRequired, Arena& arena) {
3214        // direct-dcl :
3215        //   name
3216        //  (dcl)
3217        //   direct-dcl()
3218        //   direct-dcl[]
3219        Type* pNewHead = NULL;
3220        if (accept('(')) {
3221            pNewHead = acceptDecl2(pNewHead, declName, nameAllowed,
3222                                nameRequired, arena);
3223            skip(')');
3224        } else if ((declName = acceptSymbol()) != 0) {
3225            if (nameAllowed == false && declName) {
3226                error("Symbol %s not allowed here", nameof(declName));
3227            } else if (nameRequired && ! declName) {
3228                String temp;
3229                decodeToken(temp, tok);
3230                error("Expected symbol. Got %s", temp.getUnwrapped());
3231            }
3232        }
3233        while (accept('(')) {
3234            // Function declaration
3235            Type* pTail = acceptArgs(nameAllowed, arena);
3236            pType = createType(TY_FUNC, pType, pTail, arena);
3237            skip(')');
3238        }
3239
3240        if (pNewHead) {
3241            Type* pA = pNewHead;
3242            while (pA->pHead) {
3243                pA = pA->pHead;
3244            }
3245            pA->pHead = pType;
3246            pType = pNewHead;
3247        }
3248        return pType;
3249    }
3250
3251    Type* acceptArgs(bool nameAllowed, Arena& arena) {
3252        Type* pHead = NULL;
3253        Type* pTail = NULL;
3254        for(;;) {
3255            Type* pBaseArg = acceptPrimitiveType(arena);
3256            if (pBaseArg) {
3257                Type* pArg = acceptDeclaration(pBaseArg, nameAllowed, false,
3258                                               arena);
3259                if (pArg) {
3260                    Type* pParam = createType(TY_PARAM, pArg, NULL, arena);
3261                    if (!pHead) {
3262                        pHead = pParam;
3263                        pTail = pParam;
3264                    } else {
3265                        pTail->pTail = pParam;
3266                        pTail = pParam;
3267                    }
3268                }
3269            }
3270            if (! accept(',')) {
3271                break;
3272            }
3273        }
3274        return pHead;
3275    }
3276
3277    Type* expectPrimitiveType(Arena& arena) {
3278        Type* pType = acceptPrimitiveType(arena);
3279        if (!pType) {
3280            String buf;
3281            decodeToken(buf, tok);
3282            error("Expected a type, got %s", buf.getUnwrapped());
3283        }
3284        return pType;
3285    }
3286
3287    void addGlobalSymbol(Type* pDecl) {
3288        tokenid_t t = pDecl->id;
3289        VariableInfo* pVI = VI(t);
3290        if(pVI && pVI->pAddress) {
3291            reportDuplicate(t);
3292        }
3293        mGlobals.add(pDecl);
3294    }
3295
3296    void reportDuplicate(tokenid_t t) {
3297        error("Duplicate definition of %s", nameof(t));
3298    }
3299
3300    void addLocalSymbol(Type* pDecl) {
3301        tokenid_t t = pDecl->id;
3302        if (mLocals.isDefinedAtCurrentLevel(t)) {
3303            reportDuplicate(t);
3304        }
3305        mLocals.add(pDecl);
3306    }
3307
3308    void localDeclarations(Type* pBaseType) {
3309        intptr_t a;
3310
3311        while (pBaseType) {
3312            while (tok != ';' && tok != EOF) {
3313                Type* pDecl = expectDeclaration(pBaseType, mLocalArena);
3314                if (!pDecl) {
3315                    break;
3316                }
3317                int variableAddress = 0;
3318                addLocalSymbol(pDecl);
3319                loc = loc + pGen->sizeOf(pDecl);
3320                loc = loc + 4;
3321                variableAddress = -loc;
3322                VI(pDecl->id)->pAddress = (void*) variableAddress;
3323                if (accept('=')) {
3324                    /* assignment */
3325                    expr();
3326                    pGen->storeR0(variableAddress);
3327                }
3328                if (tok == ',')
3329                    next();
3330            }
3331            skip(';');
3332            pBaseType = acceptPrimitiveType(mLocalArena);
3333        }
3334    }
3335
3336    bool checkSymbol() {
3337        return checkSymbol(tok);
3338    }
3339
3340    void decodeToken(String& buffer, tokenid_t token) {
3341        if (token == EOF ) {
3342            buffer.printf("EOF");
3343        } else if (token == TOK_NUM) {
3344            buffer.printf("numeric constant");
3345        } else if (token >= 0 && token < 256) {
3346            if (token < 32) {
3347                buffer.printf("'\\x%02x'", token);
3348            } else {
3349                buffer.printf("'%c'", token);
3350            }
3351        } else if (token >= TOK_KEYWORD && token < TOK_SYMBOL) {
3352            buffer.printf("keyword \"%s\"", nameof(token));
3353        } else {
3354            buffer.printf("symbol \"%s\"", nameof(token));
3355        }
3356    }
3357
3358    bool checkSymbol(tokenid_t token) {
3359        bool result = token >= TOK_SYMBOL;
3360        if (!result) {
3361            String temp;
3362            decodeToken(temp, token);
3363            error("Expected symbol. Got %s", temp.getUnwrapped());
3364        }
3365        return result;
3366    }
3367
3368    tokenid_t acceptSymbol() {
3369        tokenid_t result = 0;
3370        if (tok >= TOK_SYMBOL) {
3371            result = tok;
3372            next();
3373        }
3374        return result;
3375    }
3376
3377    void globalDeclarations() {
3378        while (tok != EOF) {
3379            Type* pBaseType = expectPrimitiveType(mGlobalArena);
3380            if (!pBaseType) {
3381                break;
3382            }
3383            Type* pDecl = expectDeclaration(pBaseType, mGlobalArena);
3384            if (!pDecl) {
3385                break;
3386            }
3387            if (! isDefined(pDecl->id)) {
3388                addGlobalSymbol(pDecl);
3389            }
3390            VariableInfo* name = VI(pDecl->id);
3391            if (name && name->pAddress) {
3392                error("Already defined global %s", nameof(pDecl->id));
3393            }
3394            if (pDecl->tag < TY_FUNC) {
3395                // it's a variable declaration
3396                for(;;) {
3397                    if (name && !name->pAddress) {
3398                        name->pAddress = (int*) allocGlobalSpace(4);
3399                    }
3400                    if (accept('=')) {
3401                        if (tok == TOK_NUM) {
3402                            if (name) {
3403                                * (int*) name->pAddress = tokc;
3404                            }
3405                            next();
3406                        } else {
3407                            error("Expected an integer constant");
3408                        }
3409                    }
3410                    if (!accept(',')) {
3411                        break;
3412                    }
3413                    pDecl = expectDeclaration(pBaseType, mGlobalArena);
3414                    if (!pDecl) {
3415                        break;
3416                    }
3417                    if (! isDefined(pDecl->id)) {
3418                        addGlobalSymbol(pDecl);
3419                    }
3420                    name = VI(pDecl->id);
3421                }
3422                skip(';');
3423            } else {
3424                // Function declaration
3425                if (accept(';')) {
3426                    // forward declaration.
3427                } else {
3428                    if (name) {
3429                        /* patch forward references (XXX: does not work for function
3430                         pointers) */
3431                        pGen->gsym((int) name->pForward);
3432                        /* put function address */
3433                        name->pAddress = (void*) codeBuf.getPC();
3434                    }
3435                    // Calculate stack offsets for parameters
3436                    mLocals.pushLevel();
3437                    intptr_t a = 8;
3438                    int argCount = 0;
3439                    for (Type* pP = pDecl->pTail; pP; pP = pP->pTail) {
3440                        Type* pArg = pP->pHead;
3441                        addLocalSymbol(pArg);
3442                        /* read param name and compute offset */
3443                        VI(pArg->id)->pAddress = (void*) a;
3444                        a = a + 4;
3445                        argCount++;
3446                    }
3447                    rsym = loc = 0;
3448                    pReturnType = pDecl->pHead;
3449                    a = pGen->functionEntry(argCount);
3450                    block(0, true);
3451                    pGen->gsym(rsym);
3452                    pGen->functionExit(argCount, a, loc);
3453                    mLocals.popLevel();
3454                }
3455            }
3456        }
3457    }
3458
3459    char* allocGlobalSpace(int bytes) {
3460        if (glo - pGlobalBase + bytes > ALLOC_SIZE) {
3461            error("Global space exhausted");
3462            return NULL;
3463        }
3464        char* result = glo;
3465        glo += bytes;
3466        return result;
3467    }
3468
3469    void cleanup() {
3470        if (pGlobalBase != 0) {
3471            free(pGlobalBase);
3472            pGlobalBase = 0;
3473        }
3474        if (pGen) {
3475            delete pGen;
3476            pGen = 0;
3477        }
3478        if (file) {
3479            delete file;
3480            file = 0;
3481        }
3482    }
3483
3484    void clear() {
3485        tok = 0;
3486        tokc = 0;
3487        tokl = 0;
3488        ch = 0;
3489        rsym = 0;
3490        loc = 0;
3491        glo = 0;
3492        dptr = 0;
3493        dch = 0;
3494        file = 0;
3495        pGlobalBase = 0;
3496        pGen = 0;
3497        mPragmaStringCount = 0;
3498    }
3499
3500    void setArchitecture(const char* architecture) {
3501        delete pGen;
3502        pGen = 0;
3503
3504        if (architecture != NULL) {
3505#ifdef PROVIDE_ARM_CODEGEN
3506            if (! pGen && strcmp(architecture, "arm") == 0) {
3507                pGen = new ARMCodeGenerator();
3508            }
3509#endif
3510#ifdef PROVIDE_X86_CODEGEN
3511            if (! pGen && strcmp(architecture, "x86") == 0) {
3512                pGen = new X86CodeGenerator();
3513            }
3514#endif
3515            if (!pGen ) {
3516                error("Unknown architecture %s\n", architecture);
3517            }
3518        }
3519
3520        if (pGen == NULL) {
3521#if defined(DEFAULT_ARM_CODEGEN)
3522            pGen = new ARMCodeGenerator();
3523#elif defined(DEFAULT_X86_CODEGEN)
3524            pGen = new X86CodeGenerator();
3525#endif
3526        }
3527        if (pGen == NULL) {
3528            error("No code generator defined.");
3529        } else {
3530            pGen->setErrorSink(this);
3531        }
3532    }
3533
3534public:
3535    struct args {
3536        args() {
3537            architecture = 0;
3538        }
3539        const char* architecture;
3540    };
3541
3542    Compiler() {
3543        clear();
3544    }
3545
3546    ~Compiler() {
3547        cleanup();
3548    }
3549
3550    int compile(const char* text, size_t textLength) {
3551        int result;
3552
3553        cleanup();
3554        clear();
3555        mTokenTable.setArena(&mGlobalArena);
3556        mGlobals.setArena(&mGlobalArena);
3557        mGlobals.setTokenTable(&mTokenTable);
3558        mLocals.setArena(&mLocalArena);
3559        mLocals.setTokenTable(&mTokenTable);
3560
3561        internKeywords();
3562        createPrimitiveTypes();
3563        codeBuf.init(ALLOC_SIZE);
3564        setArchitecture(NULL);
3565        if (!pGen) {
3566            return -1;
3567        }
3568#ifdef PROVIDE_TRACE_CODEGEN
3569            pGen = new TraceCodeGenerator(pGen);
3570#endif
3571            pGen->setErrorSink(this);
3572        pGen->init(&codeBuf);
3573        file = new TextInputStream(text, textLength);
3574        pGlobalBase = (char*) calloc(1, ALLOC_SIZE);
3575        glo = pGlobalBase;
3576        inp();
3577        next();
3578        globalDeclarations();
3579        checkForUndefinedForwardReferences();
3580        result = pGen->finishCompile();
3581        if (result == 0) {
3582            if (mErrorBuf.len()) {
3583                result = -2;
3584            }
3585        }
3586        return result;
3587    }
3588
3589    void createPrimitiveTypes() {
3590        mkpInt = createType(TY_INT, NULL, NULL, mGlobalArena);
3591        mkpChar = createType(TY_CHAR, NULL, NULL, mGlobalArena);
3592        mkpVoid = createType(TY_VOID, NULL, NULL, mGlobalArena);
3593        mkpFloat = createType(TY_FLOAT, NULL, NULL, mGlobalArena);
3594        mkpDouble = createType(TY_DOUBLE, NULL, NULL, mGlobalArena);
3595        mkpIntFn =  createType(TY_FUNC, mkpInt, NULL, mGlobalArena);
3596        mkpIntPtr = createPtrType(mkpInt, mGlobalArena);
3597        mkpCharPtr = createPtrType(mkpChar, mGlobalArena);
3598        mkpPtrIntFn = createPtrType(mkpIntFn, mGlobalArena);
3599    }
3600
3601    void checkForUndefinedForwardReferences() {
3602        mGlobals.forEach(static_ufrcFn, this);
3603    }
3604
3605    static bool static_ufrcFn(VariableInfo* value, void* context) {
3606        Compiler* pCompiler = (Compiler*) context;
3607        return pCompiler->undefinedForwardReferenceCheck(value);
3608    }
3609
3610    bool undefinedForwardReferenceCheck(VariableInfo* value) {
3611        if (!value->pAddress && value->pForward) {
3612            error("Undefined forward reference: %s",
3613                  mTokenTable[value->tok].pText);
3614        }
3615        return true;
3616    }
3617
3618    int dump(FILE* out) {
3619        fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
3620        return 0;
3621    }
3622
3623    int disassemble(FILE* out) {
3624        return pGen->disassemble(out);
3625    }
3626
3627    /* Look through the symbol table to find a symbol.
3628     * If found, return its value.
3629     */
3630    void* lookup(const char* name) {
3631        tokenid_t tok = mTokenTable.intern(name, strlen(name));
3632        VariableInfo* pVariableInfo = VI(tok);
3633        if (pVariableInfo) {
3634            return pVariableInfo->pAddress;
3635        }
3636        return NULL;
3637    }
3638
3639    void getPragmas(ACCsizei* actualStringCount,
3640                    ACCsizei maxStringCount, ACCchar** strings) {
3641        int stringCount = mPragmaStringCount;
3642        if (actualStringCount) {
3643            *actualStringCount = stringCount;
3644        }
3645        if (stringCount > maxStringCount) {
3646            stringCount = maxStringCount;
3647        }
3648        if (strings) {
3649            char* pPragmas = mPragmas.getUnwrapped();
3650            while (stringCount-- > 0) {
3651                *strings++ = pPragmas;
3652                pPragmas += strlen(pPragmas) + 1;
3653            }
3654        }
3655    }
3656
3657    char* getErrorMessage() {
3658        return mErrorBuf.getUnwrapped();
3659    }
3660
3661};
3662
3663const char* Compiler::operatorChars =
3664    "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
3665
3666const char Compiler::operatorLevel[] =
3667    {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
3668            5, 5, /* ==, != */
3669            9, 10, /* &&, || */
3670            6, 7, 8, /* & ^ | */
3671            2, 2 /* ~ ! */
3672            };
3673
3674#ifdef PROVIDE_ARM_CODEGEN
3675FILE* Compiler::ARMCodeGenerator::disasmOut;
3676#endif
3677
3678#ifdef PROVIDE_X86_CODEGEN
3679const int Compiler::X86CodeGenerator::operatorHelper[] = {
3680        0x1,     // ++
3681        0xff,    // --
3682        0xc1af0f, // *
3683        0xf9f79991, // /
3684        0xf9f79991, // % (With manual assist to swap results)
3685        0xc801, // +
3686        0xd8f7c829, // -
3687        0xe0d391, // <<
3688        0xf8d391, // >>
3689        0xe, // <=
3690        0xd, // >=
3691        0xc, // <
3692        0xf, // >
3693        0x4, // ==
3694        0x5, // !=
3695        0x0, // &&
3696        0x1, // ||
3697        0xc821, // &
3698        0xc831, // ^
3699        0xc809, // |
3700        0xd0f7, // ~
3701        0x4     // !
3702};
3703#endif
3704
3705struct ACCscript {
3706    ACCscript() {
3707        text = 0;
3708        textLength = 0;
3709        accError = ACC_NO_ERROR;
3710    }
3711
3712    ~ACCscript() {
3713        delete text;
3714    }
3715
3716    void setError(ACCenum error) {
3717        if (accError == ACC_NO_ERROR && error != ACC_NO_ERROR) {
3718            accError = error;
3719        }
3720    }
3721
3722    ACCenum getError() {
3723        ACCenum result = accError;
3724        accError = ACC_NO_ERROR;
3725        return result;
3726    }
3727
3728    Compiler compiler;
3729    char* text;
3730    int textLength;
3731    ACCenum accError;
3732};
3733
3734
3735extern "C"
3736ACCscript* accCreateScript() {
3737    return new ACCscript();
3738}
3739
3740extern "C"
3741ACCenum accGetError( ACCscript* script ) {
3742    return script->getError();
3743}
3744
3745extern "C"
3746void accDeleteScript(ACCscript* script) {
3747    delete script;
3748}
3749
3750extern "C"
3751void accScriptSource(ACCscript* script,
3752    ACCsizei count,
3753    const ACCchar ** string,
3754    const ACCint * length) {
3755    int totalLength = 0;
3756    for(int i = 0; i < count; i++) {
3757        int len = -1;
3758        const ACCchar* s = string[i];
3759        if (length) {
3760            len = length[i];
3761        }
3762        if (len < 0) {
3763            len = strlen(s);
3764        }
3765        totalLength += len;
3766    }
3767    delete script->text;
3768    char* text = new char[totalLength + 1];
3769    script->text = text;
3770    script->textLength = totalLength;
3771    char* dest = text;
3772    for(int i = 0; i < count; i++) {
3773        int len = -1;
3774        const ACCchar* s = string[i];
3775        if (length) {
3776            len = length[i];
3777        }
3778        if (len < 0) {
3779            len = strlen(s);
3780        }
3781        memcpy(dest, s, len);
3782        dest += len;
3783    }
3784    text[totalLength] = '\0';
3785}
3786
3787extern "C"
3788void accCompileScript(ACCscript* script) {
3789    int result = script->compiler.compile(script->text, script->textLength);
3790    if (result) {
3791        script->setError(ACC_INVALID_OPERATION);
3792    }
3793}
3794
3795extern "C"
3796void accGetScriptiv(ACCscript* script,
3797    ACCenum pname,
3798    ACCint * params) {
3799    switch (pname) {
3800        case ACC_INFO_LOG_LENGTH:
3801            *params = 0;
3802            break;
3803    }
3804}
3805
3806extern "C"
3807void accGetScriptInfoLog(ACCscript* script,
3808    ACCsizei maxLength,
3809    ACCsizei * length,
3810    ACCchar * infoLog) {
3811    char* message = script->compiler.getErrorMessage();
3812    int messageLength = strlen(message) + 1;
3813    if (length) {
3814        *length = messageLength;
3815    }
3816    if (infoLog && maxLength > 0) {
3817        int trimmedLength = maxLength < messageLength ?
3818                maxLength : messageLength;
3819        memcpy(infoLog, message, trimmedLength);
3820        infoLog[trimmedLength] = 0;
3821    }
3822}
3823
3824extern "C"
3825void accGetScriptLabel(ACCscript* script, const ACCchar * name,
3826                       ACCvoid ** address) {
3827    void* value = script->compiler.lookup(name);
3828    if (value) {
3829        *address = value;
3830    } else {
3831        script->setError(ACC_INVALID_VALUE);
3832    }
3833}
3834
3835extern "C"
3836void accGetPragmas(ACCscript* script, ACCsizei* actualStringCount,
3837                   ACCsizei maxStringCount, ACCchar** strings){
3838    script->compiler.getPragmas(actualStringCount, maxStringCount, strings);
3839}
3840
3841extern "C"
3842void accDisassemble(ACCscript* script) {
3843    script->compiler.disassemble(stderr);
3844}
3845
3846
3847} // namespace acc
3848
3849