acc.cpp revision bab80642035c2fe8565ccbadf18883b7d1df6437
1/*
2 * Android "Almost" C Compiler.
3 * This is a compiler for a small subset of the C language, intended for use
4 * in scripting environments where speed and memory footprint are important.
5 *
6 * This code is based upon the "unobfuscated" version of the
7 * Obfuscated Tiny C compiler, see the file LICENSE for details.
8 *
9 */
10
11#include <ctype.h>
12#include <dlfcn.h>
13#include <errno.h>
14#include <stdarg.h>
15#include <stdint.h>
16#include <stdio.h>
17#include <stdlib.h>
18#include <string.h>
19#include <cutils/hashmap.h>
20
21#if defined(__i386__)
22#include <sys/mman.h>
23#endif
24
25#if defined(__arm__)
26#include <unistd.h>
27#endif
28
29#if defined(__arm__)
30#define DEFAULT_ARM_CODEGEN
31#define PROVIDE_ARM_CODEGEN
32#elif defined(__i386__)
33#define DEFAULT_X86_CODEGEN
34#define PROVIDE_X86_CODEGEN
35#elif defined(__x86_64__)
36#define DEFAULT_X64_CODEGEN
37#define PROVIDE_X64_CODEGEN
38#endif
39
40#ifdef PROVIDE_ARM_CODEGEN
41#include "disassem.h"
42#endif
43
44#include <acc/acc.h>
45
46#define LOG_API(...) do {} while(0)
47// #define LOG_API(...) fprintf (stderr, __VA_ARGS__)
48
49#define LOG_STACK(...) do {} while(0)
50// #define LOG_STACK(...) fprintf (stderr, __VA_ARGS__)
51
52// #define ENABLE_ARM_DISASSEMBLY
53// #define PROVIDE_TRACE_CODEGEN
54
55namespace acc {
56
57// Subset of STL vector.
58template<class E> class Vector {
59    public:
60    Vector() {
61        mpBase = 0;
62        mUsed = 0;
63        mSize = 0;
64    }
65
66    ~Vector() {
67        if (mpBase) {
68            for(size_t i = 0; i < mUsed; i++)  {
69                mpBase[mUsed].~E();
70            }
71            free(mpBase);
72        }
73    }
74
75    inline E& operator[](size_t i) {
76        return mpBase[i];
77    }
78
79    inline E& front() {
80        return mpBase[0];
81    }
82
83    inline E& back() {
84        return mpBase[mUsed - 1];
85    }
86
87    void pop_back() {
88        mUsed -= 1;
89        mpBase[mUsed].~E();
90    }
91
92    void push_back(const E& item) {
93        * ensure(1) = item;
94    }
95
96    size_t size() {
97        return mUsed;
98    }
99
100private:
101    E* ensure(int n) {
102        size_t newUsed = mUsed + n;
103        if (newUsed > mSize) {
104            size_t newSize = mSize * 2 + 10;
105            if (newSize < newUsed) {
106                newSize = newUsed;
107            }
108            mpBase = (E*) realloc(mpBase, sizeof(E) * newSize);
109            mSize = newSize;
110        }
111        E* result = mpBase + mUsed;
112        mUsed = newUsed;
113        return result;
114    }
115
116    E* mpBase;
117    size_t mUsed;
118    size_t mSize;
119};
120
121class ErrorSink {
122public:
123    void error(const char *fmt, ...) {
124        va_list ap;
125        va_start(ap, fmt);
126        verror(fmt, ap);
127        va_end(ap);
128    }
129
130    virtual ~ErrorSink() {}
131    virtual void verror(const char* fmt, va_list ap) = 0;
132};
133
134class Compiler : public ErrorSink {
135    typedef int tokenid_t;
136    enum TypeTag {
137        TY_INT, TY_CHAR, TY_VOID, TY_FLOAT, TY_DOUBLE,
138        TY_POINTER, TY_FUNC, TY_PARAM
139    };
140
141    struct Type {
142        TypeTag tag;
143        tokenid_t id; // For function arguments
144        Type* pHead;
145        Type* pTail;
146    };
147
148    class CodeBuf {
149        char* ind; // Output code pointer
150        char* pProgramBase;
151        ErrorSink* mErrorSink;
152        int mSize;
153        bool mOverflowed;
154
155        void release() {
156            if (pProgramBase != 0) {
157                free(pProgramBase);
158                pProgramBase = 0;
159            }
160        }
161
162        bool check(int n) {
163            int newSize = ind - pProgramBase + n;
164            bool overflow = newSize > mSize;
165            if (overflow && !mOverflowed) {
166                mOverflowed = true;
167                if (mErrorSink) {
168                    mErrorSink->error("Code too large: %d bytes", newSize);
169                }
170            }
171            return overflow;
172        }
173
174    public:
175        CodeBuf() {
176            pProgramBase = 0;
177            ind = 0;
178            mErrorSink = 0;
179            mSize = 0;
180            mOverflowed = false;
181        }
182
183        ~CodeBuf() {
184            release();
185        }
186
187        void init(int size) {
188            release();
189            mSize = size;
190            pProgramBase = (char*) calloc(1, size);
191            ind = pProgramBase;
192        }
193
194        void setErrorSink(ErrorSink* pErrorSink) {
195            mErrorSink = pErrorSink;
196        }
197
198        int o4(int n) {
199            if(check(4)) {
200                return 0;
201            }
202            intptr_t result = (intptr_t) ind;
203            * (int*) ind = n;
204            ind += 4;
205            return result;
206        }
207
208        /*
209         * Output a byte. Handles all values, 0..ff.
210         */
211        void ob(int n) {
212            if(check(1)) {
213                return;
214            }
215            *ind++ = n;
216        }
217
218        inline void* getBase() {
219            return (void*) pProgramBase;
220        }
221
222        intptr_t getSize() {
223            return ind - pProgramBase;
224        }
225
226        intptr_t getPC() {
227            return (intptr_t) ind;
228        }
229    };
230
231    /**
232     * A code generator creates an in-memory program, generating the code on
233     * the fly. There is one code generator implementation for each supported
234     * architecture.
235     *
236     * The code generator implements the following abstract machine:
237     * R0 - the accumulator.
238     * FP - a frame pointer for accessing function arguments and local
239     *      variables.
240     * SP - a stack pointer for storing intermediate results while evaluating
241     *      expressions. The stack pointer grows downwards.
242     *
243     * The function calling convention is that all arguments are placed on the
244     * stack such that the first argument has the lowest address.
245     * After the call, the result is in R0. The caller is responsible for
246     * removing the arguments from the stack.
247     * The R0 register is not saved across function calls. The
248     * FP and SP registers are saved.
249     */
250
251    class CodeGenerator {
252    public:
253        CodeGenerator() {
254            mErrorSink = 0;
255            pCodeBuf = 0;
256            pushType();
257        }
258        virtual ~CodeGenerator() {}
259
260        virtual void init(CodeBuf* pCodeBuf) {
261            this->pCodeBuf = pCodeBuf;
262            pCodeBuf->setErrorSink(mErrorSink);
263        }
264
265        virtual void setErrorSink(ErrorSink* pErrorSink) {
266            mErrorSink = pErrorSink;
267            if (pCodeBuf) {
268                pCodeBuf->setErrorSink(mErrorSink);
269            }
270        }
271
272        /* Emit a function prolog.
273         * argCount is the number of arguments.
274         * Save the old value of the FP.
275         * Set the new value of the FP.
276         * Convert from the native platform calling convention to
277         * our stack-based calling convention. This may require
278         * pushing arguments from registers to the stack.
279         * Allocate "N" bytes of stack space. N isn't known yet, so
280         * just emit the instructions for adjusting the stack, and return
281         * the address to patch up. The patching will be done in
282         * functionExit().
283         * returns address to patch with local variable size.
284        */
285        virtual int functionEntry(int argCount) = 0;
286
287        /* Emit a function epilog.
288         * Restore the old SP and FP register values.
289         * Return to the calling function.
290         * argCount - the number of arguments to the function.
291         * localVariableAddress - returned from functionEntry()
292         * localVariableSize - the size in bytes of the local variables.
293         */
294        virtual void functionExit(int argCount, int localVariableAddress,
295                                  int localVariableSize) = 0;
296
297        /* load immediate value to R0 */
298        virtual void li(int i, Type* pType) = 0;
299
300        /* Load floating point value from global address. */
301        virtual void loadFloat(int address, Type* pType) = 0;
302
303        /* Jump to a target, and return the address of the word that
304         * holds the target data, in case it needs to be fixed up later.
305         */
306        virtual int gjmp(int t) = 0;
307
308        /* Test R0 and jump to a target if the test succeeds.
309         * l = 0: je, l == 1: jne
310         * Return the address of the word that holds the targed data, in
311         * case it needs to be fixed up later.
312         */
313        virtual int gtst(bool l, int t) = 0;
314
315        /* Compare TOS against R0, and store the boolean result in R0.
316         * Pops TOS.
317         * op specifies the comparison.
318         */
319        virtual void gcmp(int op, Type* pResultType) = 0;
320
321        /* Perform the arithmetic op specified by op. TOS is the
322         * left argument, R0 is the right argument.
323         * Pops TOS.
324         */
325        virtual void genOp(int op) = 0;
326
327        /* Compare 0 against R0, and store the boolean result in R0.
328         * op specifies the comparison.
329         */
330        virtual void gUnaryCmp(int op, Type* pResultType) = 0;
331
332        /* Perform the arithmetic op specified by op. 0 is the
333         * left argument, R0 is the right argument.
334         */
335        virtual void genUnaryOp(int op) = 0;
336
337        /* Push R0 onto the stack.
338         */
339        virtual void pushR0() = 0;
340
341        /* Store R0 to the address stored in TOS.
342         * The TOS is popped.
343         * pPointerType is the type of the pointer (of the input R0).
344         */
345        virtual void storeR0ToTOS(Type* pPointerType) = 0;
346
347        /* Load R0 from the address stored in R0.
348         * pPointerType is the type of the pointer (of the input R0).
349         */
350        virtual void loadR0FromR0(Type* pPointerType) = 0;
351
352        /* Load the absolute address of a variable to R0.
353         * If ea <= LOCAL, then this is a local variable, or an
354         * argument, addressed relative to FP.
355         * else it is an absolute global address.
356         */
357        virtual void leaR0(int ea, Type* pPointerType) = 0;
358
359        /* Store R0 to a variable.
360         * If ea <= LOCAL, then this is a local variable, or an
361         * argument, addressed relative to FP.
362         * else it is an absolute global address.
363         */
364        virtual void storeR0(int ea, Type* pType) = 0;
365
366        /* load R0 from a variable.
367         * If ea <= LOCAL, then this is a local variable, or an
368         * argument, addressed relative to FP.
369         * else it is an absolute global address.
370         * If isIncDec is true, then the stored variable's value
371         * should be post-incremented or post-decremented, based
372         * on the value of op.
373         */
374        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) = 0;
375
376        /**
377         * Convert R0 to the given type.
378         */
379        virtual void convertR0(Type* pType) = 0;
380
381        /* Emit code to adjust the stack for a function call. Return the
382         * label for the address of the instruction that adjusts the
383         * stack size. This will be passed as argument "a" to
384         * endFunctionCallArguments.
385         */
386        virtual int beginFunctionCallArguments() = 0;
387
388        /* Emit code to store R0 to the stack at byte offset l.
389         * Returns stack size of object (typically 4 or 8 bytes)
390         */
391        virtual size_t storeR0ToArg(int l) = 0;
392
393        /* Patch the function call preamble.
394         * a is the address returned from beginFunctionCallArguments
395         * l is the number of bytes the arguments took on the stack.
396         * Typically you would also emit code to convert the argument
397         * list into whatever the native function calling convention is.
398         * On ARM for example you would pop the first 5 arguments into
399         * R0..R4
400         */
401        virtual void endFunctionCallArguments(int a, int l) = 0;
402
403        /* Emit a call to an unknown function. The argument "symbol" needs to
404         * be stored in the location where the address should go. It forms
405         * a chain. The address will be patched later.
406         * Return the address of the word that has to be patched.
407         */
408        virtual int callForward(int symbol, Type* pFunc) = 0;
409
410        /* Call a function using PC-relative addressing. t is the PC-relative
411         * address of the function. It has already been adjusted for the
412         * architectural jump offset, so just store it as-is.
413         */
414        virtual void callRelative(int t, Type* pFunc) = 0;
415
416        /* Call a function pointer. L is the number of bytes the arguments
417         * take on the stack. The address of the function is stored at
418         * location SP + l.
419         */
420        virtual void callIndirect(int l, Type* pFunc) = 0;
421
422        /* Adjust SP after returning from a function call. l is the
423         * number of bytes of arguments stored on the stack. isIndirect
424         * is true if this was an indirect call. (In which case the
425         * address of the function is stored at location SP + l.)
426         */
427        virtual void adjustStackAfterCall(int l, bool isIndirect) = 0;
428
429        /* Print a disassembly of the assembled code to out. Return
430         * non-zero if there is an error.
431         */
432        virtual int disassemble(FILE* out) = 0;
433
434        /* Generate a symbol at the current PC. t is the head of a
435         * linked list of addresses to patch.
436         */
437        virtual void gsym(int t) = 0;
438
439        /*
440         * Do any cleanup work required at the end of a compile.
441         * For example, an instruction cache might need to be
442         * invalidated.
443         * Return non-zero if there is an error.
444         */
445        virtual int finishCompile() = 0;
446
447        /**
448         * Adjust relative branches by this amount.
449         */
450        virtual int jumpOffset() = 0;
451
452        /**
453         * Memory alignment (in bytes) for this type of data
454         */
455        virtual size_t alignment(Type* type) = 0;
456
457        /**
458         * Array element alignment (in bytes) for this type of data.
459         */
460        virtual size_t sizeOf(Type* type) = 0;
461
462        /**
463         * Stack argument size of this data type.
464         */
465        virtual size_t stackSizeOf(Type* pType) = 0;
466
467        virtual Type* getR0Type() {
468            return mExpressionStack.back();
469        }
470
471    protected:
472        /*
473         * Output a byte. Handles all values, 0..ff.
474         */
475        void ob(int n) {
476            pCodeBuf->ob(n);
477        }
478
479        intptr_t o4(int data) {
480            return pCodeBuf->o4(data);
481        }
482
483        intptr_t getBase() {
484            return (intptr_t) pCodeBuf->getBase();
485        }
486
487        intptr_t getPC() {
488            return pCodeBuf->getPC();
489        }
490
491        intptr_t getSize() {
492            return pCodeBuf->getSize();
493        }
494
495        void error(const char* fmt,...) {
496            va_list ap;
497            va_start(ap, fmt);
498            mErrorSink->verror(fmt, ap);
499            va_end(ap);
500        }
501
502        void assert(bool test) {
503            if (!test) {
504                * (char*) 0 = 0;
505                error("code generator assertion failed.");
506            }
507        }
508
509        void setR0Type(Type* pType) {
510            mExpressionStack.back() = pType;
511        }
512
513        Type* getTOSType() {
514            return mExpressionStack[mExpressionStack.size()-2];
515        }
516
517        void pushType() {
518            mExpressionStack.push_back(NULL);
519        }
520
521        void popType() {
522            mExpressionStack.pop_back();
523        }
524
525        bool bitsSame(Type* pA, Type* pB) {
526            return collapseType(pA->tag) == collapseType(pB->tag);
527        }
528
529        TypeTag collapseType(TypeTag tag) {
530            static const TypeTag collapsedTag[] = {
531                    TY_INT, TY_INT, TY_VOID, TY_FLOAT, TY_DOUBLE, TY_INT,
532                    TY_VOID, TY_VOID};
533            return collapsedTag[tag];
534        }
535
536        TypeTag collapseTypeR0() {
537            return collapseType(getR0Type()->tag);
538        }
539
540        bool isFloatType(Type* pType) {
541            return isFloatTag(pType->tag);
542        }
543
544        bool isFloatTag(TypeTag tag) {
545            return tag == TY_FLOAT || tag == TY_DOUBLE;
546        }
547
548    private:
549        Vector<Type*> mExpressionStack;
550        CodeBuf* pCodeBuf;
551        ErrorSink* mErrorSink;
552    };
553
554#ifdef PROVIDE_ARM_CODEGEN
555
556    class ARMCodeGenerator : public CodeGenerator {
557    public:
558        ARMCodeGenerator() {}
559
560        virtual ~ARMCodeGenerator() {}
561
562        /* returns address to patch with local variable size
563        */
564        virtual int functionEntry(int argCount) {
565            LOG_API("functionEntry(%d);\n", argCount);
566            mStackUse = 0;
567            // sp -> arg4 arg5 ...
568            // Push our register-based arguments back on the stack
569            if (argCount > 0) {
570                int regArgCount = argCount <= 4 ? argCount : 4;
571                o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd    sp!, {}
572                mStackUse += regArgCount * 4;
573            }
574            // sp -> arg0 arg1 ...
575            o4(0xE92D4800); // stmfd sp!, {fp, lr}
576            mStackUse += 2 * 4;
577            // sp, fp -> oldfp, retadr, arg0 arg1 ....
578            o4(0xE1A0B00D); // mov    fp, sp
579            LOG_STACK("functionEntry: %d\n", mStackUse);
580            return o4(0xE24DD000); // sub    sp, sp, # <local variables>
581            // We don't know how many local variables we are going to use,
582            // but we will round the allocation up to a multiple of
583            // STACK_ALIGNMENT, so it won't affect the stack alignment.
584        }
585
586        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
587            LOG_API("functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize);
588            // Round local variable size up to a multiple of stack alignment
589            localVariableSize = ((localVariableSize + STACK_ALIGNMENT - 1) /
590                STACK_ALIGNMENT) * STACK_ALIGNMENT;
591            // Patch local variable allocation code:
592            if (localVariableSize < 0 || localVariableSize > 255) {
593                error("localVariables out of range: %d", localVariableSize);
594            }
595            *(char*) (localVariableAddress) = localVariableSize;
596
597            // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ...
598            o4(0xE1A0E00B); // mov lr, fp
599            o4(0xE59BB000); // ldr fp, [fp]
600            o4(0xE28ED004); // add sp, lr, #4
601            // sp -> retadr, arg0, ...
602            o4(0xE8BD4000); // ldmfd    sp!, {lr}
603            // sp -> arg0 ....
604            if (argCount > 0) {
605                // We store the PC into the lr so we can adjust the sp before
606                // returning. We need to pull off the registers we pushed
607                // earlier. We don't need to actually store them anywhere,
608                // just adjust the stack.
609                int regArgCount = argCount <= 4 ? argCount : 4;
610                o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2
611            }
612            o4(0xE12FFF1E); // bx lr
613        }
614
615        /* load immediate value */
616        virtual void li(int t, Type* pType) {
617            LOG_API("li(%d);\n", t);
618            if (t >= 0 && t < 255) {
619                 o4(0xE3A00000 + t); // mov    r0, #0
620            } else if (t >= -256 && t < 0) {
621                // mvn means move constant ^ ~0
622                o4(0xE3E00001 - t); // mvn    r0, #0
623            } else {
624                  o4(0xE51F0000); //         ldr    r0, .L3
625                  o4(0xEA000000); //         b .L99
626                  o4(t);          // .L3:   .word 0
627                                  // .L99:
628            }
629            setR0Type(pType);
630        }
631
632        virtual void loadFloat(int address, Type* pType) {
633            error("Unimplemented.\n");
634            setR0Type(pType);
635        }
636
637        virtual int gjmp(int t) {
638            LOG_API("gjmp(%d);\n", t);
639            return o4(0xEA000000 | encodeAddress(t)); // b .L33
640        }
641
642        /* l = 0: je, l == 1: jne */
643        virtual int gtst(bool l, int t) {
644            LOG_API("gtst(%d, %d);\n", l, t);
645            o4(0xE3500000); // cmp r0,#0
646            int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq
647            return o4(branch | encodeAddress(t));
648        }
649
650        virtual void gcmp(int op, Type* pResultType) {
651            LOG_API("gcmp(%d);\n", op);
652            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
653            mStackUse -= 4;
654            o4(0xE1510000); // cmp r1, r1
655            switch(op) {
656            case OP_EQUALS:
657                o4(0x03A00001); // moveq r0,#1
658                o4(0x13A00000); // movne r0,#0
659                break;
660            case OP_NOT_EQUALS:
661                o4(0x03A00000); // moveq r0,#0
662                o4(0x13A00001); // movne r0,#1
663                break;
664            case OP_LESS_EQUAL:
665                o4(0xD3A00001); // movle r0,#1
666                o4(0xC3A00000); // movgt r0,#0
667                break;
668            case OP_GREATER:
669                o4(0xD3A00000); // movle r0,#0
670                o4(0xC3A00001); // movgt r0,#1
671                break;
672            case OP_GREATER_EQUAL:
673                o4(0xA3A00001); // movge r0,#1
674                o4(0xB3A00000); // movlt r0,#0
675                break;
676            case OP_LESS:
677                o4(0xA3A00000); // movge r0,#0
678                o4(0xB3A00001); // movlt r0,#1
679                break;
680            default:
681                error("Unknown comparison op %d", op);
682                break;
683            }
684            popType();
685        }
686
687        virtual void genOp(int op) {
688            LOG_API("genOp(%d);\n", op);
689            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
690            mStackUse -= 4;
691            switch(op) {
692            case OP_MUL:
693                o4(0x0E0000091); // mul     r0,r1,r0
694                break;
695            case OP_DIV:
696                callRuntime(runtime_DIV);
697                break;
698            case OP_MOD:
699                callRuntime(runtime_MOD);
700                break;
701            case OP_PLUS:
702                o4(0xE0810000);  // add     r0,r1,r0
703                break;
704            case OP_MINUS:
705                o4(0xE0410000);  // sub     r0,r1,r0
706                break;
707            case OP_SHIFT_LEFT:
708                o4(0xE1A00011);  // lsl     r0,r1,r0
709                break;
710            case OP_SHIFT_RIGHT:
711                o4(0xE1A00051);  // asr     r0,r1,r0
712                break;
713            case OP_BIT_AND:
714                o4(0xE0010000);  // and     r0,r1,r0
715                break;
716            case OP_BIT_XOR:
717                o4(0xE0210000);  // eor     r0,r1,r0
718                break;
719            case OP_BIT_OR:
720                o4(0xE1810000);  // orr     r0,r1,r0
721                break;
722            case OP_BIT_NOT:
723                o4(0xE1E00000);  // mvn     r0, r0
724                break;
725            default:
726                error("Unimplemented op %d\n", op);
727                break;
728            }
729            popType();
730        }
731
732        virtual void gUnaryCmp(int op, Type* pResultType) {
733            LOG_API("gUnaryCmp(%d);\n", op);
734            o4(0xE3A01000);  // mov    r1, #0
735            o4(0xE1510000); // cmp r1, r1
736            switch(op) {
737            case OP_LOGICAL_NOT:
738                o4(0x03A00000); // moveq r0,#0
739                o4(0x13A00001); // movne r0,#1
740                break;
741             default:
742                error("Unknown unary comparison op %d", op);
743                break;
744            }
745            setR0Type(pResultType);
746        }
747
748        virtual void genUnaryOp(int op) {
749            LOG_API("genOp(%d);\n", op);
750            switch(op) {
751            case OP_MINUS:
752                o4(0xE3A01000);  // mov    r1, #0
753                o4(0xE0410000);  // sub     r0,r1,r0
754                break;
755            case OP_BIT_NOT:
756                o4(0xE1E00000);  // mvn     r0, r0
757                break;
758            default:
759                error("Unknown unary op %d\n", op);
760                break;
761            }
762        }
763
764        virtual void pushR0() {
765            LOG_API("pushR0();\n");
766            o4(0xE92D0001);  // stmfd   sp!,{r0}
767            mStackUse += 4;
768            pushType();
769            LOG_STACK("pushR0: %d\n", mStackUse);
770        }
771
772        virtual void storeR0ToTOS(Type* pPointerType) {
773            LOG_API("storeR0ToTOS(%d);\n", isInt);
774            assert(pPointerType->tag == TY_POINTER);
775            o4(0xE8BD0002);  // ldmfd   sp!,{r1}
776            mStackUse -= 4;
777            switch (pPointerType->pHead->tag) {
778                case TY_INT:
779                    o4(0xE5810000); // str r0, [r1]
780                    break;
781                case TY_CHAR:
782                    o4(0xE5C10000); // strb r0, [r1]
783                    break;
784                default:
785                    error("storeR0ToTOS: unimplemented type");
786                    break;
787            }
788            popType();
789        }
790
791        virtual void loadR0FromR0(Type* pPointerType) {
792            LOG_API("loadR0FromR0(%d);\n", pPointerType);
793            assert(pPointerType->tag == TY_POINTER);
794            switch (pPointerType->pHead->tag) {
795                case TY_INT:
796                    o4(0xE5900000); // ldr r0, [r0]
797                    break;
798                case TY_CHAR:
799                    o4(0xE5D00000); // ldrb r0, [r0]
800                    break;
801                default:
802                    error("loadR0FromR0: unimplemented type");
803                    break;
804            }
805            setR0Type(pPointerType->pHead);
806        }
807
808        virtual void leaR0(int ea, Type* pPointerType) {
809            LOG_API("leaR0(%d);\n", ea);
810            if (ea < LOCAL) {
811                // Local, fp relative
812                if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) {
813                    error("Offset out of range: %08x", ea);
814                }
815                if (ea < 0) {
816                    o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub    r0, fp, #ea
817                } else {
818                    o4(0xE28B0F00 | (0xff & (ea >> 2))); // add    r0, fp, #ea
819                }
820            } else {
821                // Global, absolute.
822                o4(0xE59F0000); //        ldr    r0, .L1
823                o4(0xEA000000); //        b .L99
824                o4(ea);         // .L1:   .word 0
825                                // .L99:
826            }
827            setR0Type(pPointerType);
828        }
829
830        virtual void storeR0(int ea, Type* pType) {
831            LOG_API("storeR0(%d);\n", ea);
832            if (ea < LOCAL) {
833                // Local, fp relative
834                if (ea < -4095 || ea > 4095) {
835                    error("Offset out of range: %08x", ea);
836                }
837                if (ea < 0) {
838                    o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea]
839                } else {
840                    o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea]
841                }
842            } else{
843                // Global, absolute
844                o4(0xE59F1000); //         ldr r1, .L1
845                o4(0xEA000000); //         b .L99
846                o4(ea);         // .L1:    .word 0
847                o4(0xE5810000); // .L99:   str r0, [r1]
848            }
849        }
850
851        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
852            LOG_API("loadR0(%d, %d, %d, %d);\n", ea, isIncDec, op, pType);
853            if (ea < LOCAL) {
854                // Local, fp relative
855                if (ea < -4095 || ea > 4095) {
856                    error("Offset out of range: %08x", ea);
857                }
858                if (ea < 0) {
859                    o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea]
860                } else {
861                    o4(0xE59B0000 | (0xfff & ea));    // ldr r0, [fp,#ea]
862                }
863            } else {
864                // Global, absolute
865                o4(0xE59F2000); //        ldr r2, .L1
866                o4(0xEA000000); //        b .L99
867                o4(ea);         // .L1:   .word ea
868                o4(0xE5920000); // .L99:  ldr r0, [r2]
869            }
870
871            if (isIncDec) {
872                switch (op) {
873                case OP_INCREMENT:
874                    o4(0xE2801001); // add r1, r0, #1
875                    break;
876                case OP_DECREMENT:
877                    o4(0xE2401001); // sub r1, r0, #1
878                    break;
879                default:
880                    error("unknown opcode: %d", op);
881                }
882                if (ea < LOCAL) {
883                    // Local, fp relative
884                    // Don't need range check, was already checked above
885                    if (ea < 0) {
886                        o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea]
887                    } else {
888                        o4(0xE58B1000 | (0xfff & ea));    // str r1, [fp,#ea]
889                    }
890                } else{
891                    // Global, absolute
892                    // r2 is already set up from before.
893                    o4(0xE5821000); // str r1, [r2]
894               }
895            }
896            setR0Type(pType);
897        }
898
899        virtual void convertR0(Type* pType){
900            Type* pR0Type = getR0Type();
901            if (bitsSame(pType, pR0Type)) {
902                // do nothing special
903            } else if (isFloatType(pType) && isFloatType(pR0Type)) {
904                // do nothing special, both held in same register on x87.
905            } else {
906                error("Incompatible types old: %d new: %d",
907                      pR0Type->tag, pType->tag);
908            }
909            setR0Type(pType);
910        }
911
912        virtual int beginFunctionCallArguments() {
913            LOG_API("beginFunctionCallArguments();\n");
914            return o4(0xE24DDF00); // Placeholder
915        }
916
917        virtual size_t storeR0ToArg(int l) {
918            LOG_API("storeR0ToArg(%d);\n", l);
919            if (l < 0 || l > 4096-4) {
920                error("l out of range for stack offset: 0x%08x", l);
921            }
922            o4(0xE58D0000 + l); // str r0, [sp, #4]
923            return 4;
924        }
925
926        virtual void endFunctionCallArguments(int a, int l) {
927            LOG_API("endFunctionCallArguments(0x%08x, %d);\n", a, l);
928            int argCount = l >> 2;
929            int argumentStackUse = l;
930            if (argCount > 0) {
931                int regArgCount = argCount > 4 ? 4 : argCount;
932                argumentStackUse -= regArgCount * 4;
933                o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd   sp!,{}
934            }
935            mStackUse += argumentStackUse;
936
937            // Align stack.
938            int missalignment = mStackUse - ((mStackUse / STACK_ALIGNMENT)
939                    * STACK_ALIGNMENT);
940            mStackAlignmentAdjustment = 0;
941            if (missalignment > 0) {
942                mStackAlignmentAdjustment = STACK_ALIGNMENT - missalignment;
943            }
944            l += mStackAlignmentAdjustment;
945
946            if (l < 0 || l > 0x3FC) {
947                error("L out of range for stack adjustment: 0x%08x", l);
948            }
949            * (int*) a = 0xE24DDF00 | (l >> 2); // sub    sp, sp, #0 << 2
950            mStackUse += mStackAlignmentAdjustment;
951            LOG_STACK("endFunctionCallArguments mStackUse: %d, mStackAlignmentAdjustment %d\n",
952                      mStackUse, mStackAlignmentAdjustment);
953        }
954
955        virtual int callForward(int symbol, Type* pFunc) {
956            LOG_API("callForward(%d);\n", symbol);
957            setR0Type(pFunc->pHead);
958            // Forward calls are always short (local)
959            return o4(0xEB000000 | encodeAddress(symbol));
960        }
961
962        virtual void callRelative(int t, Type* pFunc) {
963            LOG_API("callRelative(%d);\n", t);
964            setR0Type(pFunc->pHead);
965            int abs = t + getPC() + jumpOffset();
966            LOG_API("abs=%d (0x%08x)\n", abs, abs);
967            if (t >= - (1 << 25) && t < (1 << 25)) {
968                o4(0xEB000000 | encodeAddress(t));
969            } else {
970                // Long call.
971                o4(0xE59FC000); //         ldr    r12, .L1
972                o4(0xEA000000); //         b .L99
973                o4(t - 12);     // .L1:    .word 0
974                o4(0xE08CC00F); // .L99:   add r12,pc
975                o4(0xE12FFF3C); //         blx r12
976           }
977        }
978
979        virtual void callIndirect(int l, Type* pFunc) {
980            LOG_API("callIndirect(%d);\n", l);
981            setR0Type(pFunc->pHead);
982            int argCount = l >> 2;
983            int poppedArgs = argCount > 4 ? 4 : argCount;
984            int adjustedL = l - (poppedArgs << 2) + mStackAlignmentAdjustment;
985            if (adjustedL < 0 || adjustedL > 4096-4) {
986                error("l out of range for stack offset: 0x%08x", l);
987            }
988            o4(0xE59DC000 | (0xfff & adjustedL)); // ldr    r12, [sp,#adjustedL]
989            o4(0xE12FFF3C); // blx r12
990        }
991
992        virtual void adjustStackAfterCall(int l, bool isIndirect) {
993            LOG_API("adjustStackAfterCall(%d, %d);\n", l, isIndirect);
994            int argCount = l >> 2;
995            int stackArgs = argCount > 4 ? argCount - 4 : 0;
996            int stackUse =  stackArgs + (isIndirect ? 1 : 0)
997                + (mStackAlignmentAdjustment >> 2);
998            if (stackUse) {
999                if (stackUse < 0 || stackUse > 255) {
1000                    error("L out of range for stack adjustment: 0x%08x", l);
1001                }
1002                o4(0xE28DDF00 | stackUse); // add    sp, sp, #stackUse << 2
1003                mStackUse -= stackUse * 4;
1004                LOG_STACK("adjustStackAfterCall: %d\n", mStackUse);
1005            }
1006        }
1007
1008        virtual int jumpOffset() {
1009            return 8;
1010        }
1011
1012        /* output a symbol and patch all calls to it */
1013        virtual void gsym(int t) {
1014            LOG_API("gsym(0x%x)\n", t);
1015            int n;
1016            int base = getBase();
1017            int pc = getPC();
1018            LOG_API("pc = 0x%x\n", pc);
1019            while (t) {
1020                int data = * (int*) t;
1021                int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2);
1022                if (decodedOffset == 0) {
1023                    n = 0;
1024                } else {
1025                    n = base + decodedOffset; /* next value */
1026                }
1027                *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK)
1028                    | encodeRelAddress(pc - t - 8);
1029                t = n;
1030            }
1031        }
1032
1033        virtual int finishCompile() {
1034#if defined(__arm__)
1035            const long base = long(getBase());
1036            const long curr = long(getPC());
1037            int err = cacheflush(base, curr, 0);
1038            return err;
1039#else
1040            return 0;
1041#endif
1042        }
1043
1044        virtual int disassemble(FILE* out) {
1045#ifdef ENABLE_ARM_DISASSEMBLY
1046            disasmOut = out;
1047            disasm_interface_t  di;
1048            di.di_readword = disassemble_readword;
1049            di.di_printaddr = disassemble_printaddr;
1050            di.di_printf = disassemble_printf;
1051
1052            int base = getBase();
1053            int pc = getPC();
1054            for(int i = base; i < pc; i += 4) {
1055                fprintf(out, "%08x: %08x  ", i, *(int*) i);
1056                ::disasm(&di, i, 0);
1057            }
1058#endif
1059            return 0;
1060        }
1061
1062        /**
1063         * alignment (in bytes) for this type of data
1064         */
1065        virtual size_t alignment(Type* pType){
1066            switch(pType->tag) {
1067                case TY_DOUBLE:
1068                    return 8;
1069                default:
1070                    return 4;
1071            }
1072        }
1073
1074        /**
1075         * Array element alignment (in bytes) for this type of data.
1076         */
1077        virtual size_t sizeOf(Type* pType){
1078            switch(pType->tag) {
1079                case TY_INT:
1080                    return 4;
1081                case TY_CHAR:
1082                    return 1;
1083                default:
1084                    return 0;
1085                case TY_FLOAT:
1086                    return 4;
1087                case TY_DOUBLE:
1088                    return 8;
1089                case TY_POINTER:
1090                    return 4;
1091            }
1092        }
1093
1094        virtual size_t stackSizeOf(Type* pType) {
1095            switch(pType->tag) {
1096                case TY_DOUBLE:
1097                    return 8;
1098                default:
1099                    return 4;
1100            }
1101        }
1102
1103    private:
1104        static FILE* disasmOut;
1105
1106        static u_int
1107        disassemble_readword(u_int address)
1108        {
1109            return(*((u_int *)address));
1110        }
1111
1112        static void
1113        disassemble_printaddr(u_int address)
1114        {
1115            fprintf(disasmOut, "0x%08x", address);
1116        }
1117
1118        static void
1119        disassemble_printf(const char *fmt, ...) {
1120            va_list ap;
1121            va_start(ap, fmt);
1122            vfprintf(disasmOut, fmt, ap);
1123            va_end(ap);
1124        }
1125
1126        static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff;
1127
1128        /** Encode a relative address that might also be
1129         * a label.
1130         */
1131        int encodeAddress(int value) {
1132            int base = getBase();
1133            if (value >= base && value <= getPC() ) {
1134                // This is a label, encode it relative to the base.
1135                value = value - base;
1136            }
1137            return encodeRelAddress(value);
1138        }
1139
1140        int encodeRelAddress(int value) {
1141            return BRANCH_REL_ADDRESS_MASK & (value >> 2);
1142        }
1143
1144        typedef int (*int2FnPtr)(int a, int b);
1145        void callRuntime(int2FnPtr fn) {
1146            o4(0xE59F2000); // ldr    r2, .L1
1147            o4(0xEA000000); // b      .L99
1148            o4((int) fn);   //.L1:  .word  fn
1149            o4(0xE12FFF32); //.L99: blx    r2
1150        }
1151
1152        static int runtime_DIV(int a, int b) {
1153            return b / a;
1154        }
1155
1156        static int runtime_MOD(int a, int b) {
1157            return b % a;
1158        }
1159
1160        static const int STACK_ALIGNMENT = 8;
1161        int mStackUse;
1162        // This variable holds the amount we adjusted the stack in the most
1163        // recent endFunctionCallArguments call. It's examined by the
1164        // following adjustStackAfterCall call.
1165        int mStackAlignmentAdjustment;
1166    };
1167
1168#endif // PROVIDE_ARM_CODEGEN
1169
1170#ifdef PROVIDE_X86_CODEGEN
1171
1172    class X86CodeGenerator : public CodeGenerator {
1173    public:
1174        X86CodeGenerator() {}
1175        virtual ~X86CodeGenerator() {}
1176
1177        /* returns address to patch with local variable size
1178        */
1179        virtual int functionEntry(int argCount) {
1180            o(0xe58955); /* push   %ebp, mov %esp, %ebp */
1181            return oad(0xec81, 0); /* sub $xxx, %esp */
1182        }
1183
1184        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
1185            o(0xc3c9); /* leave, ret */
1186            *(int *) localVariableAddress = localVariableSize; /* save local variables */
1187        }
1188
1189        /* load immediate value */
1190        virtual void li(int i, Type* pType) {
1191            oad(0xb8, i); /* mov $xx, %eax */
1192            setR0Type(pType);
1193        }
1194
1195        virtual void loadFloat(int address, Type* pType) {
1196            setR0Type(pType);
1197            switch (pType->tag) {
1198            case TY_FLOAT:
1199                oad(0x05D9, address);      // flds
1200                break;
1201            case TY_DOUBLE:
1202                oad(0x05DD, address);      // fldl
1203                break;
1204            default:
1205                assert(false);
1206                break;
1207            }
1208        }
1209
1210        virtual int gjmp(int t) {
1211            return psym(0xe9, t);
1212        }
1213
1214        /* l = 0: je, l == 1: jne */
1215        virtual int gtst(bool l, int t) {
1216            Type* pR0Type = getR0Type();
1217            TypeTag tagR0 = pR0Type->tag;
1218            bool isFloatR0 = isFloatTag(tagR0);
1219            if (isFloatR0) {
1220                o(0xeed9); // fldz
1221                o(0xe9da); // fucompp
1222                o(0xe0df); // fnstsw %ax
1223                o(0x9e);   // sahf
1224            } else {
1225                o(0xc085); // test %eax, %eax
1226            }
1227            // Use two output statements to generate one instruction.
1228            o(0x0f);   // je/jne xxx
1229            return psym(0x84 + l, t);
1230        }
1231
1232        virtual void gcmp(int op, Type* pResultType) {
1233            Type* pR0Type = getR0Type();
1234            Type* pTOSType = getTOSType();
1235            TypeTag tagR0 = pR0Type->tag;
1236            TypeTag tagTOS = pTOSType->tag;
1237            bool isFloatR0 = isFloatTag(tagR0);
1238            bool isFloatTOS = isFloatTag(tagTOS);
1239            if (!isFloatR0 && !isFloatTOS) {
1240                int t = decodeOp(op);
1241                o(0x59); /* pop %ecx */
1242                o(0xc139); /* cmp %eax,%ecx */
1243                li(0, NULL);
1244                o(0x0f); /* setxx %al */
1245                o(t + 0x90);
1246                o(0xc0);
1247                popType();
1248            } else {
1249                setupFloatOperands();
1250                switch (op) {
1251                    case OP_EQUALS:
1252                        o(0xe9da);   // fucompp
1253                        o(0xe0df);   // fnstsw %ax
1254                        o(0x9e);     // sahf
1255                        o(0xc0940f); // sete %al
1256                        o(0xc29b0f); // setnp %dl
1257                        o(0xd021);   // andl %edx, %eax
1258                        break;
1259                    case OP_NOT_EQUALS:
1260                        o(0xe9da);   // fucompp
1261                        o(0xe0df);   // fnstsw %ax
1262                        o(0x9e);     // sahf
1263                        o(0xc0950f); // setne %al
1264                        o(0xc29a0f); // setp %dl
1265                        o(0xd009);   // orl %edx, %eax
1266                        break;
1267                    case OP_GREATER_EQUAL:
1268                        o(0xe9da);   // fucompp
1269                        o(0xe0df);   // fnstsw %ax
1270                        o(0x05c4f6); // testb $5, %ah
1271                        o(0xc0940f); // sete %al
1272                        break;
1273                    case OP_LESS:
1274                        o(0xc9d9);   // fxch %st(1)
1275                        o(0xe9da);   // fucompp
1276                        o(0xe0df);   // fnstsw %ax
1277                        o(0x9e);     // sahf
1278                        o(0xc0970f); // seta %al
1279                        break;
1280                    case OP_LESS_EQUAL:
1281                        o(0xc9d9);   // fxch %st(1)
1282                        o(0xe9da);   // fucompp
1283                        o(0xe0df);   // fnstsw %ax
1284                        o(0x9e);     // sahf
1285                        o(0xc0930f); // setea %al
1286                        break;
1287                    case OP_GREATER:
1288                        o(0xe9da);   // fucompp
1289                        o(0xe0df);   // fnstsw %ax
1290                        o(0x45c4f6); // testb $69, %ah
1291                        o(0xc0940f); // sete %al
1292                        break;
1293                    default:
1294                        error("Unknown comparison op");
1295                }
1296                o(0xc0b60f); // movzbl %al, %eax
1297            }
1298            setR0Type(pResultType);
1299        }
1300
1301        virtual void genOp(int op) {
1302            Type* pR0Type = getR0Type();
1303            Type* pTOSType = getTOSType();
1304            TypeTag tagR0 = pR0Type->tag;
1305            TypeTag tagTOS = pTOSType->tag;
1306            bool isFloatR0 = isFloatTag(tagR0);
1307            bool isFloatTOS = isFloatTag(tagTOS);
1308            if (!isFloatR0 && !isFloatTOS) {
1309                // TODO: Deal with pointer arithmetic
1310                o(0x59); /* pop %ecx */
1311                o(decodeOp(op));
1312                if (op == OP_MOD)
1313                    o(0x92); /* xchg %edx, %eax */
1314                popType();
1315            } else {
1316                Type* pResultType = tagR0 > tagTOS ? pR0Type : pTOSType;
1317                setupFloatOperands();
1318                // Both float. x87 R0 == left hand, x87 R1 == right hand
1319                switch (op) {
1320                    case OP_MUL:
1321                        o(0xc9de); // fmulp
1322                        break;
1323                    case OP_DIV:
1324                        o(0xf1de); // fdivp
1325                        break;
1326                    case OP_PLUS:
1327                        o(0xc1de); // faddp
1328                        break;
1329                    case OP_MINUS:
1330                        o(0xe1de); // fsubp
1331                        break;
1332                    default:
1333                        error("Unsupported binary floating operation.");
1334                        break;
1335                }
1336                popType();
1337                setR0Type(pResultType);
1338            }
1339        }
1340
1341        virtual void gUnaryCmp(int op, Type* pResultType) {
1342            if (op != OP_LOGICAL_NOT) {
1343                error("Unknown unary cmp %d", op);
1344            } else {
1345                Type* pR0Type = getR0Type();
1346                TypeTag tag = collapseType(pR0Type->tag);
1347                switch(tag) {
1348                    case TY_INT: {
1349                            oad(0xb9, 0); /* movl $0, %ecx */
1350                            int t = decodeOp(op);
1351                            o(0xc139); /* cmp %eax,%ecx */
1352                            li(0, NULL);
1353                            o(0x0f); /* setxx %al */
1354                            o(t + 0x90);
1355                            o(0xc0);
1356                        }
1357                        break;
1358                    case TY_FLOAT:
1359                    case TY_DOUBLE:
1360                        o(0xeed9);   // fldz
1361                        o(0xe9da);   // fucompp
1362                        o(0xe0df);   // fnstsw %ax
1363                        o(0x9e);     // sahf
1364                        o(0xc0950f); // setne %al
1365                        o(0xc29a0f); // setp %dl
1366                        o(0xd009);   // orl %edx, %eax
1367                        o(0xc0b60f); // movzbl %al, %eax
1368                        o(0x01f083); // xorl $1,  %eax
1369                        break;
1370                    default:
1371                        error("genUnaryCmp unsupported type");
1372                        break;
1373                }
1374            }
1375            setR0Type(pResultType);
1376        }
1377
1378        virtual void genUnaryOp(int op) {
1379            Type* pR0Type = getR0Type();
1380            TypeTag tag = collapseType(pR0Type->tag);
1381            switch(tag) {
1382                case TY_INT:
1383                    oad(0xb9, 0); /* movl $0, %ecx */
1384                    o(decodeOp(op));
1385                    break;
1386                case TY_FLOAT:
1387                case TY_DOUBLE:
1388                    switch (op) {
1389                        case OP_MINUS:
1390                            o(0xe0d9);  // fchs
1391                            break;
1392                        case OP_BIT_NOT:
1393                            error("Can't apply '~' operator to a float or double.");
1394                            break;
1395                        default:
1396                            error("Unknown unary op %d\n", op);
1397                            break;
1398                        }
1399                    break;
1400                default:
1401                    error("genUnaryOp unsupported type");
1402                    break;
1403            }
1404        }
1405
1406        virtual void pushR0() {
1407            Type* pR0Type = getR0Type();
1408            TypeTag r0ct = collapseType(pR0Type->tag);
1409            switch(r0ct) {
1410                case TY_INT:
1411                    o(0x50); /* push %eax */
1412                    break;
1413                case TY_FLOAT:
1414                    o(0x50); /* push %eax */
1415                    o(0x241cd9); // fstps 0(%esp)
1416                    break;
1417                case TY_DOUBLE:
1418                    o(0x50); /* push %eax */
1419                    o(0x50); /* push %eax */
1420                    o(0x241cdd); // fstpl 0(%esp)
1421                    break;
1422                default:
1423                    error("pushR0 unsupported type %d", r0ct);
1424                    break;
1425            }
1426            pushType();
1427        }
1428
1429        virtual void storeR0ToTOS(Type* pPointerType) {
1430            assert(pPointerType->tag == TY_POINTER);
1431            o(0x59); /* pop %ecx */
1432            popType();
1433            switch (pPointerType->pHead->tag) {
1434                case TY_INT:
1435                    o(0x0189); /* movl %eax/%al, (%ecx) */
1436                    break;
1437                case TY_CHAR:
1438                    o(0x0188); /* movl %eax/%al, (%ecx) */
1439                    break;
1440                case TY_FLOAT:
1441                    o(0x19d9); /* fstps (%ecx) */
1442                    break;
1443                case TY_DOUBLE:
1444                    o(0x19dd); /* fstpl (%ecx) */
1445                    break;
1446                default:
1447                    error("storeR0ToTOS: unsupported type");
1448                    break;
1449            }
1450        }
1451
1452        virtual void loadR0FromR0(Type* pPointerType) {
1453            assert(pPointerType->tag == TY_POINTER);
1454            switch (pPointerType->pHead->tag) {
1455                case TY_INT:
1456                    o2(0x008b); /* mov (%eax), %eax */
1457                    break;
1458                case TY_CHAR:
1459                    o(0xbe0f); /* movsbl (%eax), %eax */
1460                    ob(0); /* add zero in code */
1461                    break;
1462                case TY_FLOAT:
1463                    o2(0x00d9); // flds (%eax)
1464                    break;
1465                case TY_DOUBLE:
1466                    o2(0x00dd); // fldl (%eax)
1467                    break;
1468                default:
1469                    error("loadR0FromR0: unsupported type");
1470                    break;
1471            }
1472            setR0Type(pPointerType->pHead);
1473        }
1474
1475        virtual void leaR0(int ea, Type* pPointerType) {
1476            gmov(10, ea); /* leal EA, %eax */
1477            setR0Type(pPointerType);
1478        }
1479
1480        virtual void storeR0(int ea, Type* pType) {
1481            TypeTag tag = pType->tag;
1482            switch (tag) {
1483                case TY_INT:
1484                    gmov(6, ea); /* mov %eax, EA */
1485                    break;
1486                case TY_FLOAT:
1487                    if (ea < -LOCAL || ea > LOCAL) {
1488                        oad(0x1dd9, ea); // fstps ea
1489                    } else {
1490                        oad(0x9dd9, ea); // fstps ea(%ebp)
1491                    }
1492                    break;
1493                case TY_DOUBLE:
1494                    if (ea < -LOCAL || ea > LOCAL) {
1495                        oad(0x1ddd, ea); // fstpl ea
1496                    } else {
1497                        oad(0x9ddd, ea); // fstpl ea(%ebp)
1498                    }
1499                    break;
1500                default:
1501                    error("Unable to store to type %d", tag);
1502                    break;
1503            }
1504        }
1505
1506        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
1507            TypeTag tag = collapseType(pType->tag);
1508            switch (tag) {
1509                case TY_INT:
1510                    gmov(8, ea); /* mov EA, %eax */
1511                    if (isIncDec) {
1512                        /* Implement post-increment or post decrement.
1513                         */
1514                        gmov(0, ea); /* 83 ADD */
1515                        o(decodeOp(op));
1516                    }
1517                    break;
1518                case TY_FLOAT:
1519                    if (ea < -LOCAL || ea > LOCAL) {
1520                        oad(0x05d9, ea); // flds ea
1521                    } else {
1522                        oad(0x85d9, ea); // flds ea(%ebp)
1523                    }
1524                    if (isIncDec) {
1525                        error("inc/dec not implemented for float.");
1526                    }
1527                    break;
1528                case TY_DOUBLE:
1529                    if (ea < -LOCAL || ea > LOCAL) {
1530                        oad(0x05dd, ea); // fldl ea
1531                    } else {
1532                        oad(0x85dd, ea); // fldl ea(%ebp)
1533                    }
1534                    if (isIncDec) {
1535                        error("inc/dec not implemented for double.");
1536                    }
1537                    break;
1538                default:
1539                    error("Unable to load type %d", tag);
1540                    break;
1541            }
1542            setR0Type(pType);
1543        }
1544
1545        virtual void convertR0(Type* pType){
1546            Type* pR0Type = getR0Type();
1547            if (pR0Type == NULL) {
1548                assert(false);
1549                setR0Type(pType);
1550                return;
1551            }
1552            if (bitsSame(pType, pR0Type)) {
1553                // do nothing special
1554            } else if (isFloatType(pType) && isFloatType(pR0Type)) {
1555                // do nothing special, both held in same register on x87.
1556            } else {
1557                TypeTag r0Tag = collapseType(pR0Type->tag);
1558                TypeTag destTag = collapseType(pType->tag);
1559                if (r0Tag == TY_INT && isFloatTag(destTag)) {
1560                    // Convert R0 from int to float
1561                    o(0x50);      // push %eax
1562                    o(0x2404DB);  // fildl 0(%esp)
1563                    o(0x58);      // pop %eax
1564                } else if (isFloatTag(r0Tag) && destTag == TY_INT) {
1565                    // Convert R0 from float to int. Complicated because
1566                    // need to save and restore the rounding mode.
1567                    o(0x50);       // push %eax
1568                    o(0x50);       // push %eax
1569                    o(0x02247cD9); // fnstcw 2(%esp)
1570                    o(0x2444b70f); // movzwl 2(%esp), %eax
1571                    o(0x02);
1572                    o(0x0cb4);     // movb $12, %ah
1573                    o(0x24048966); // movw %ax, 0(%esp)
1574                    o(0x242cd9);   // fldcw 0(%esp)
1575                    o(0x04245cdb); // fistpl 4(%esp)
1576                    o(0x02246cd9); // fldcw  2(%esp)
1577                    o(0x58); // pop %eax
1578                    o(0x58); // pop %eax
1579                } else {
1580                    error("Incompatible types old: %d new: %d",
1581                          pR0Type->tag, pType->tag);
1582                }
1583            }
1584            setR0Type(pType);
1585        }
1586
1587        virtual int beginFunctionCallArguments() {
1588            return oad(0xec81, 0); /* sub $xxx, %esp */
1589        }
1590
1591        virtual size_t storeR0ToArg(int l) {
1592            Type* pR0Type = getR0Type();
1593            TypeTag r0ct = collapseType(pR0Type->tag);
1594            switch(r0ct) {
1595                case TY_INT:
1596                    oad(0x248489, l); /* movl %eax, xxx(%esp) */
1597                    return 4;
1598                case TY_FLOAT:
1599                    oad(0x249CD9, l); /* fstps   xxx(%esp) */
1600                    return 4;
1601                case TY_DOUBLE:
1602                    oad(0x249CDD, l); /* fstpl   xxx(%esp) */
1603                    return 8;
1604                default:
1605                    assert(false);
1606                    return 0;
1607            }
1608        }
1609
1610        virtual void endFunctionCallArguments(int a, int l) {
1611            * (int*) a = l;
1612        }
1613
1614        virtual int callForward(int symbol, Type* pFunc) {
1615            setR0Type(pFunc->pHead);
1616            return psym(0xe8, symbol); /* call xxx */
1617        }
1618
1619        virtual void callRelative(int t, Type* pFunc) {
1620            setR0Type(pFunc->pHead);
1621            psym(0xe8, t); /* call xxx */
1622        }
1623
1624        virtual void callIndirect(int l, Type* pFunc) {
1625            setR0Type(pFunc->pHead);
1626            oad(0x2494ff, l); /* call *xxx(%esp) */
1627        }
1628
1629        virtual void adjustStackAfterCall(int l, bool isIndirect) {
1630            if (isIndirect) {
1631                l += 4;
1632            }
1633            if (l > 0) {
1634                oad(0xc481, l); /* add $xxx, %esp */
1635            }
1636        }
1637
1638        virtual int jumpOffset() {
1639            return 5;
1640        }
1641
1642        virtual int disassemble(FILE* out) {
1643            return 0;
1644        }
1645
1646        /* output a symbol and patch all calls to it */
1647        virtual void gsym(int t) {
1648            int n;
1649            int pc = getPC();
1650            while (t) {
1651                n = *(int *) t; /* next value */
1652                *(int *) t = pc - t - 4;
1653                t = n;
1654            }
1655        }
1656
1657        virtual int finishCompile() {
1658            size_t pagesize = 4096;
1659            size_t base = (size_t) getBase() & ~ (pagesize - 1);
1660            size_t top =  ((size_t) getPC() + pagesize - 1) & ~ (pagesize - 1);
1661            int err = mprotect((void*) base, top - base, PROT_READ | PROT_WRITE | PROT_EXEC);
1662            if (err) {
1663               error("mprotect() failed: %d", errno);
1664            }
1665            return err;
1666        }
1667
1668        /**
1669         * Alignment (in bytes) for this type of data
1670         */
1671        virtual size_t alignment(Type* pType){
1672            switch(pType->tag) {
1673                case TY_DOUBLE:
1674                    return 8;
1675                default:
1676                    return 4;
1677            }
1678        }
1679
1680        /**
1681         * Array element alignment (in bytes) for this type of data.
1682         */
1683        virtual size_t sizeOf(Type* pType){
1684            switch(pType->tag) {
1685                case TY_INT:
1686                    return 4;
1687                case TY_CHAR:
1688                    return 1;
1689                default:
1690                    return 0;
1691                case TY_FLOAT:
1692                    return 4;
1693                case TY_DOUBLE:
1694                    return 8;
1695                case TY_POINTER:
1696                    return 4;
1697            }
1698        }
1699
1700        virtual size_t stackSizeOf(Type* pType) {
1701            switch(pType->tag) {
1702                case TY_DOUBLE:
1703                    return 8;
1704                default:
1705                    return 4;
1706            }
1707        }
1708
1709    private:
1710
1711        /** Output 1 to 4 bytes.
1712         *
1713         */
1714        void o(int n) {
1715            /* cannot use unsigned, so we must do a hack */
1716            while (n && n != -1) {
1717                ob(n & 0xff);
1718                n = n >> 8;
1719            }
1720        }
1721
1722        /* Output exactly 2 bytes
1723         */
1724        void o2(int n) {
1725            ob(n & 0xff);
1726            ob(0xff & (n >> 8));
1727        }
1728
1729        /* psym is used to put an instruction with a data field which is a
1730         reference to a symbol. It is in fact the same as oad ! */
1731        int psym(int n, int t) {
1732            return oad(n, t);
1733        }
1734
1735        /* instruction + address */
1736        int oad(int n, int t) {
1737            o(n);
1738            int result = getPC();
1739            o4(t);
1740            return result;
1741        }
1742
1743
1744        static const int operatorHelper[];
1745
1746        int decodeOp(int op) {
1747            if (op < 0 || op > OP_COUNT) {
1748                error("Out-of-range operator: %d\n", op);
1749                op = 0;
1750            }
1751            return operatorHelper[op];
1752        }
1753
1754        void gmov(int l, int t) {
1755            o(l + 0x83);
1756            oad((t > -LOCAL && t < LOCAL) << 7 | 5, t);
1757        }
1758
1759        void setupFloatOperands() {
1760            Type* pR0Type = getR0Type();
1761            Type* pTOSType = getTOSType();
1762            TypeTag tagR0 = pR0Type->tag;
1763            TypeTag tagTOS = pTOSType->tag;
1764            bool isFloatR0 = isFloatTag(tagR0);
1765            bool isFloatTOS = isFloatTag(tagTOS);
1766            if (! isFloatR0) {
1767                // Convert R0 from int to float
1768                o(0x50);      // push %eax
1769                o(0x2404DB);  // fildl 0(%esp)
1770                o(0x58);      // pop %eax
1771            }
1772            if (! isFloatTOS){
1773                o(0x2404DB);  // fildl 0(%esp);
1774                o(0x58);      // pop %eax
1775            } else {
1776                if (tagTOS == TY_FLOAT) {
1777                    o(0x2404d9);  // flds (%esp)
1778                    o(0x58);      // pop %eax
1779                } else {
1780                    o(0x2404dd);  // fldl (%esp)
1781                    o(0x58);      // pop %eax
1782                    o(0x58);      // pop %eax
1783                }
1784            }
1785        }
1786    };
1787
1788#endif // PROVIDE_X86_CODEGEN
1789
1790#ifdef PROVIDE_TRACE_CODEGEN
1791    class TraceCodeGenerator : public CodeGenerator {
1792    private:
1793        CodeGenerator* mpBase;
1794
1795    public:
1796        TraceCodeGenerator(CodeGenerator* pBase) {
1797            mpBase = pBase;
1798        }
1799
1800        virtual ~TraceCodeGenerator() {
1801            delete mpBase;
1802        }
1803
1804        virtual void init(CodeBuf* pCodeBuf) {
1805            mpBase->init(pCodeBuf);
1806        }
1807
1808        void setErrorSink(ErrorSink* pErrorSink) {
1809            mpBase->setErrorSink(pErrorSink);
1810        }
1811
1812        /* returns address to patch with local variable size
1813        */
1814        virtual int functionEntry(int argCount) {
1815            int result = mpBase->functionEntry(argCount);
1816            fprintf(stderr, "functionEntry(%d) -> %d\n", argCount, result);
1817            return result;
1818        }
1819
1820        virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) {
1821            fprintf(stderr, "functionExit(%d, %d, %d)\n",
1822                    argCount, localVariableAddress, localVariableSize);
1823            mpBase->functionExit(argCount, localVariableAddress, localVariableSize);
1824        }
1825
1826        /* load immediate value */
1827        virtual void li(int t, Type* pType) {
1828            fprintf(stderr, "li(%d)\n", t);
1829            mpBase->li(t, pType);
1830        }
1831
1832        virtual void loadFloat(int address, Type* pType) {
1833            fprintf(stderr, "loadFloat(%d, type)\n", address);
1834            mpBase->loadFloat(address, pType);
1835        }
1836
1837        virtual int gjmp(int t) {
1838            int result = mpBase->gjmp(t);
1839            fprintf(stderr, "gjmp(%d) = %d\n", t, result);
1840            return result;
1841        }
1842
1843        /* l = 0: je, l == 1: jne */
1844        virtual int gtst(bool l, int t) {
1845            int result = mpBase->gtst(l, t);
1846            fprintf(stderr, "gtst(%d,%d) = %d\n", l, t, result);
1847            return result;
1848        }
1849
1850        virtual void gcmp(int op, Type* pResultType) {
1851            fprintf(stderr, "gcmp(%d, pResultType)\n", op);
1852            mpBase->gcmp(op, pResultType);
1853        }
1854
1855        virtual void genOp(int op) {
1856            fprintf(stderr, "genOp(%d)\n", op);
1857            mpBase->genOp(op);
1858        }
1859
1860
1861        virtual void gUnaryCmp(int op, Type* pResultType) {
1862            fprintf(stderr, "gUnaryCmp(%d, pResultType)\n", op);
1863            mpBase->gUnaryCmp(op, pResultType);
1864        }
1865
1866        virtual void genUnaryOp(int op) {
1867            fprintf(stderr, "genUnaryOp(%d)\n", op);
1868            mpBase->genUnaryOp(op);
1869        }
1870
1871        virtual void pushR0() {
1872            fprintf(stderr, "pushR0()\n");
1873            mpBase->pushR0();
1874        }
1875
1876        virtual void storeR0ToTOS(Type* pPointerType) {
1877            fprintf(stderr, "storeR0ToTOS(%d)\n", pPointerType->pHead->tag);
1878            mpBase->storeR0ToTOS(pPointerType);
1879        }
1880
1881        virtual void loadR0FromR0(Type* pPointerType) {
1882            fprintf(stderr, "loadR0FromR0(%d)\n", pPointerType->pHead->tag);
1883            mpBase->loadR0FromR0(pPointerType);
1884        }
1885
1886        virtual void leaR0(int ea, Type* pPointerType) {
1887            fprintf(stderr, "leaR0(%d)\n", ea);
1888            mpBase->leaR0(ea, pPointerType);
1889        }
1890
1891        virtual void storeR0(int ea, Type* pType) {
1892            fprintf(stderr, "storeR0(%d, pType)\n", ea);
1893            mpBase->storeR0(ea, pType);
1894        }
1895
1896        virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) {
1897            fprintf(stderr, "loadR0(%d, %d, %d, pType)\n", ea, isIncDec, op);
1898            mpBase->loadR0(ea, isIncDec, op, pType);
1899        }
1900
1901        virtual void convertR0(Type* pType){
1902            fprintf(stderr, "convertR0(pType)\n");
1903            mpBase->convertR0(pType);
1904        }
1905
1906        virtual int beginFunctionCallArguments() {
1907            int result = mpBase->beginFunctionCallArguments();
1908            fprintf(stderr, "beginFunctionCallArguments() = %d\n", result);
1909            return result;
1910        }
1911
1912        virtual size_t storeR0ToArg(int l) {
1913            fprintf(stderr, "storeR0ToArg(%d)\n", l);
1914            return mpBase->storeR0ToArg(l);
1915        }
1916
1917        virtual void endFunctionCallArguments(int a, int l) {
1918            fprintf(stderr, "endFunctionCallArguments(%d, %d)\n", a, l);
1919            mpBase->endFunctionCallArguments(a, l);
1920        }
1921
1922        virtual int callForward(int symbol, Type* pFunc) {
1923            int result = mpBase->callForward(symbol, pFunc);
1924            fprintf(stderr, "callForward(%d) = %d\n", symbol, result);
1925            return result;
1926        }
1927
1928        virtual void callRelative(int t, Type* pFunc) {
1929            fprintf(stderr, "callRelative(%d)\n", t);
1930            mpBase->callRelative(t, pFunc);
1931        }
1932
1933        virtual void callIndirect(int l, Type* pFunc) {
1934            fprintf(stderr, "callIndirect(%d)\n", l);
1935            mpBase->callIndirect(l, pFunc);
1936        }
1937
1938        virtual void adjustStackAfterCall(int l, bool isIndirect) {
1939            fprintf(stderr, "adjustStackAfterCall(%d, %d)\n", l, isIndirect);
1940            mpBase->adjustStackAfterCall(l, isIndirect);
1941        }
1942
1943        virtual int jumpOffset() {
1944            return mpBase->jumpOffset();
1945        }
1946
1947        virtual int disassemble(FILE* out) {
1948            return mpBase->disassemble(out);
1949        }
1950
1951        /* output a symbol and patch all calls to it */
1952        virtual void gsym(int t) {
1953            fprintf(stderr, "gsym(%d)\n", t);
1954            mpBase->gsym(t);
1955        }
1956
1957        virtual int finishCompile() {
1958            int result = mpBase->finishCompile();
1959            fprintf(stderr, "finishCompile() = %d\n", result);
1960            return result;
1961        }
1962
1963        /**
1964         * Alignment (in bytes) for this type of data
1965         */
1966        virtual size_t alignment(Type* pType){
1967            return mpBase->alignment(pType);
1968        }
1969
1970        /**
1971         * Array element alignment (in bytes) for this type of data.
1972         */
1973        virtual size_t sizeOf(Type* pType){
1974            return mpBase->sizeOf(pType);
1975        }
1976
1977
1978        virtual size_t stackSizeOf(Type* pType) {
1979            return mpBase->stackSizeOf(pType);
1980        }
1981
1982
1983        virtual Type* getR0Type() {
1984            return mpBase->getR0Type();
1985        }
1986    };
1987
1988#endif // PROVIDE_TRACE_CODEGEN
1989
1990    class Arena {
1991    public:
1992        // Used to record a given allocation amount.
1993        // Used:
1994        // Mark mark = arena.mark();
1995        // ... lots of arena.allocate()
1996        // arena.free(mark);
1997
1998        struct Mark {
1999            size_t chunk;
2000            size_t offset;
2001        };
2002
2003        Arena() {
2004            mCurrentChunk = 0;
2005            Chunk start(CHUNK_SIZE);
2006            mData.push_back(start);
2007        }
2008
2009        ~Arena() {
2010            for(size_t i = 0; i < mData.size(); i++) {
2011                mData[i].free();
2012            }
2013        }
2014
2015        // Alloc using the standard alignment size safe for any variable
2016        void* alloc(size_t size) {
2017            return alloc(size, 8);
2018        }
2019
2020        Mark mark(){
2021            Mark result;
2022            result.chunk = mCurrentChunk;
2023            result.offset = mData[mCurrentChunk].mOffset;
2024            return result;
2025        }
2026
2027        void freeToMark(const Mark& mark) {
2028            mCurrentChunk = mark.chunk;
2029            mData[mCurrentChunk].mOffset = mark.offset;
2030        }
2031
2032    private:
2033        // Allocate memory aligned to a given size
2034        // and a given power-of-two-sized alignment (e.g. 1,2,4,8,...)
2035        // Memory is not zero filled.
2036
2037        void* alloc(size_t size, size_t alignment) {
2038            while (size > mData[mCurrentChunk].remainingCapacity(alignment)) {
2039                if (mCurrentChunk + 1 < mData.size()) {
2040                    mCurrentChunk++;
2041                } else {
2042                    size_t allocSize = CHUNK_SIZE;
2043                    if (allocSize < size + alignment - 1) {
2044                        allocSize = size + alignment - 1;
2045                    }
2046                    Chunk chunk(allocSize);
2047                    mData.push_back(chunk);
2048                    mCurrentChunk++;
2049                }
2050            }
2051            return mData[mCurrentChunk].allocate(size, alignment);
2052        }
2053
2054        static const size_t CHUNK_SIZE = 128*1024;
2055        // Note: this class does not deallocate its
2056        // memory when it's destroyed. It depends upon
2057        // its parent to deallocate the memory.
2058        struct Chunk {
2059            Chunk() {
2060                mpData = 0;
2061                mSize = 0;
2062                mOffset = 0;
2063            }
2064
2065            Chunk(size_t size) {
2066                mSize = size;
2067                mpData = (char*) malloc(size);
2068                mOffset = 0;
2069            }
2070
2071            ~Chunk() {
2072                // Doesn't deallocate memory.
2073            }
2074
2075            void* allocate(size_t size, size_t alignment) {
2076                size_t alignedOffset = aligned(mOffset, alignment);
2077                void* result = mpData + alignedOffset;
2078                mOffset = alignedOffset + size;
2079                return result;
2080            }
2081
2082            void free() {
2083                if (mpData) {
2084                    ::free(mpData);
2085                    mpData = 0;
2086                }
2087            }
2088
2089            size_t remainingCapacity(size_t alignment) {
2090                return aligned(mSize, alignment) - aligned(mOffset, alignment);
2091            }
2092
2093            // Assume alignment is a power of two
2094            inline size_t aligned(size_t v, size_t alignment) {
2095                size_t mask = alignment-1;
2096                return (v + mask) & ~mask;
2097            }
2098
2099            char* mpData;
2100            size_t mSize;
2101            size_t mOffset;
2102        };
2103
2104        size_t mCurrentChunk;
2105
2106        Vector<Chunk> mData;
2107    };
2108
2109    struct VariableInfo;
2110
2111    struct Token {
2112        int hash;
2113        size_t length;
2114        char* pText;
2115        tokenid_t id;
2116
2117        // Current values for the token
2118        char* mpMacroDefinition;
2119        VariableInfo* mpVariableInfo;
2120    };
2121
2122    class TokenTable {
2123    public:
2124        // Don't use 0..0xff, allows characters and operators to be tokens too.
2125
2126        static const int TOKEN_BASE = 0x100;
2127        TokenTable() {
2128            mpMap = hashmapCreate(128, hashFn, equalsFn);
2129        }
2130
2131        ~TokenTable() {
2132            hashmapFree(mpMap);
2133        }
2134
2135        void setArena(Arena* pArena) {
2136            mpArena = pArena;
2137        }
2138
2139        // Returns a token for a given string of characters.
2140        tokenid_t intern(const char* pText, size_t length) {
2141            Token probe;
2142            int hash = hashmapHash((void*) pText, length);
2143            {
2144                Token probe;
2145                probe.hash = hash;
2146                probe.length = length;
2147                probe.pText = (char*) pText;
2148                Token* pValue = (Token*) hashmapGet(mpMap, &probe);
2149                if (pValue) {
2150                    return pValue->id;
2151                }
2152            }
2153
2154            Token* pToken = (Token*) mpArena->alloc(sizeof(Token));
2155            memset(pToken, 0, sizeof(*pToken));
2156            pToken->hash = hash;
2157            pToken->length = length;
2158            pToken->pText = (char*) mpArena->alloc(length + 1);
2159            memcpy(pToken->pText, pText, length);
2160            pToken->pText[length] = 0;
2161            pToken->id = mTokens.size() + TOKEN_BASE;
2162            mTokens.push_back(pToken);
2163            hashmapPut(mpMap, pToken, pToken);
2164            return pToken->id;
2165        }
2166
2167        // Return the Token for a given tokenid.
2168        Token& operator[](tokenid_t id) {
2169            return *mTokens[id - TOKEN_BASE];
2170        }
2171
2172        inline size_t size() {
2173            return mTokens.size();
2174        }
2175
2176    private:
2177
2178        static int hashFn(void* pKey) {
2179            Token* pToken = (Token*) pKey;
2180            return pToken->hash;
2181        }
2182
2183        static bool equalsFn(void* keyA, void* keyB) {
2184            Token* pTokenA = (Token*) keyA;
2185            Token* pTokenB = (Token*) keyB;
2186            // Don't need to compare hash values, they should always be equal
2187            return pTokenA->length == pTokenB->length
2188                && strcmp(pTokenA->pText, pTokenB->pText) == 0;
2189        }
2190
2191        Hashmap* mpMap;
2192        Vector<Token*> mTokens;
2193        Arena* mpArena;
2194    };
2195
2196    class InputStream {
2197    public:
2198        virtual ~InputStream() {}
2199        int getChar() {
2200            if (bumpLine) {
2201                line++;
2202                bumpLine = false;
2203            }
2204            int ch = get();
2205            if (ch == '\n') {
2206                bumpLine = true;
2207            }
2208            return ch;
2209        }
2210        int getLine() {
2211            return line;
2212        }
2213    protected:
2214        InputStream() :
2215            line(1), bumpLine(false) {
2216        }
2217    private:
2218        virtual int get() = 0;
2219        int line;
2220        bool bumpLine;
2221    };
2222
2223    class FileInputStream : public InputStream {
2224    public:
2225        FileInputStream(FILE* in) : f(in) {}
2226    private:
2227        virtual int get() { return fgetc(f); }
2228        FILE* f;
2229    };
2230
2231    class TextInputStream : public InputStream {
2232    public:
2233        TextInputStream(const char* text, size_t textLength)
2234            : pText(text), mTextLength(textLength), mPosition(0) {
2235        }
2236
2237    private:
2238        virtual int get() {
2239            return mPosition < mTextLength ? pText[mPosition++] : EOF;
2240        }
2241
2242        const char* pText;
2243        size_t mTextLength;
2244        size_t mPosition;
2245    };
2246
2247    class String {
2248    public:
2249        String() {
2250            mpBase = 0;
2251            mUsed = 0;
2252            mSize = 0;
2253        }
2254
2255        String(const char* item, int len, bool adopt) {
2256            if (len < 0) {
2257                len = strlen(item);
2258            }
2259            if (adopt) {
2260                mpBase = (char*) item;
2261                mUsed = len;
2262                mSize = len + 1;
2263            } else {
2264                mpBase = 0;
2265                mUsed = 0;
2266                mSize = 0;
2267                appendBytes(item, len);
2268            }
2269        }
2270
2271        String(const String& other) {
2272            mpBase = 0;
2273            mUsed = 0;
2274            mSize = 0;
2275            appendBytes(other.getUnwrapped(), other.len());
2276        }
2277
2278        ~String() {
2279            if (mpBase) {
2280                free(mpBase);
2281            }
2282        }
2283
2284        String& operator=(const String& other) {
2285            clear();
2286            appendBytes(other.getUnwrapped(), other.len());
2287            return *this;
2288        }
2289
2290        inline char* getUnwrapped() const {
2291            return mpBase;
2292        }
2293
2294        void clear() {
2295            mUsed = 0;
2296            if (mSize > 0) {
2297                mpBase[0] = 0;
2298            }
2299        }
2300
2301        void appendCStr(const char* s) {
2302            appendBytes(s, strlen(s));
2303        }
2304
2305        void appendBytes(const char* s, int n) {
2306            memcpy(ensure(n), s, n + 1);
2307        }
2308
2309        void append(char c) {
2310            * ensure(1) = c;
2311        }
2312
2313        void append(String& other) {
2314            appendBytes(other.getUnwrapped(), other.len());
2315        }
2316
2317        char* orphan() {
2318            char* result = mpBase;
2319            mpBase = 0;
2320            mUsed = 0;
2321            mSize = 0;
2322            return result;
2323        }
2324
2325        void printf(const char* fmt,...) {
2326            va_list ap;
2327            va_start(ap, fmt);
2328            vprintf(fmt, ap);
2329            va_end(ap);
2330        }
2331
2332        void vprintf(const char* fmt, va_list ap) {
2333            char* temp;
2334            int numChars = vasprintf(&temp, fmt, ap);
2335            memcpy(ensure(numChars), temp, numChars+1);
2336            free(temp);
2337        }
2338
2339        inline size_t len() const {
2340            return mUsed;
2341        }
2342
2343    private:
2344        char* ensure(int n) {
2345            size_t newUsed = mUsed + n;
2346            if (newUsed > mSize) {
2347                size_t newSize = mSize * 2 + 10;
2348                if (newSize < newUsed) {
2349                    newSize = newUsed;
2350                }
2351                mpBase = (char*) realloc(mpBase, newSize + 1);
2352                mSize = newSize;
2353            }
2354            mpBase[newUsed] = '\0';
2355            char* result = mpBase + mUsed;
2356            mUsed = newUsed;
2357            return result;
2358        }
2359
2360        char* mpBase;
2361        size_t mUsed;
2362        size_t mSize;
2363    };
2364
2365    void internKeywords() {
2366        // Note: order has to match TOK_ constants
2367        static const char* keywords[] = {
2368            "int",
2369            "char",
2370            "void",
2371            "if",
2372            "else",
2373            "while",
2374            "break",
2375            "return",
2376            "for",
2377            "pragma",
2378            "define",
2379            "auto",
2380            "case",
2381            "const",
2382            "continue",
2383            "default",
2384            "do",
2385            "double",
2386            "enum",
2387            "extern",
2388            "float",
2389            "goto",
2390            "long",
2391            "register",
2392            "short",
2393            "signed",
2394            "sizeof",
2395            "static",
2396            "struct",
2397            "switch",
2398            "typedef",
2399            "union",
2400            "unsigned",
2401            "volatile",
2402            "_Bool",
2403            "_Complex",
2404            "_Imaginary",
2405            "inline",
2406            "restrict",
2407            0};
2408
2409        for(int i = 0; keywords[i]; i++) {
2410            mTokenTable.intern(keywords[i], strlen(keywords[i]));
2411        }
2412    }
2413
2414    struct InputState {
2415        InputStream* pStream;
2416        int oldCh;
2417    };
2418
2419    struct VariableInfo {
2420        void* pAddress;
2421        void* pForward; // For a forward direction, linked list of data to fix up
2422        tokenid_t tok;
2423        size_t level;
2424        VariableInfo* pOldDefinition;
2425        Type* pType;
2426    };
2427
2428    class SymbolStack {
2429    public:
2430        SymbolStack() {
2431            mpArena = 0;
2432            mpTokenTable = 0;
2433        }
2434
2435        void setArena(Arena* pArena) {
2436            mpArena = pArena;
2437        }
2438
2439        void setTokenTable(TokenTable* pTokenTable) {
2440            mpTokenTable = pTokenTable;
2441        }
2442
2443        void pushLevel() {
2444            Mark mark;
2445            mark.mArenaMark = mpArena->mark();
2446            mark.mSymbolHead = mStack.size();
2447            mLevelStack.push_back(mark);
2448        }
2449
2450        void popLevel() {
2451            // Undo any shadowing that was done:
2452            Mark mark = mLevelStack.back();
2453            mLevelStack.pop_back();
2454            while (mStack.size() > mark.mSymbolHead) {
2455                VariableInfo* pV = mStack.back();
2456                mStack.pop_back();
2457                (*mpTokenTable)[pV->tok].mpVariableInfo = pV->pOldDefinition;
2458            }
2459            mpArena->freeToMark(mark.mArenaMark);
2460        }
2461
2462        bool isDefinedAtCurrentLevel(tokenid_t tok) {
2463            VariableInfo* pV = (*mpTokenTable)[tok].mpVariableInfo;
2464            return pV && pV->level == level();
2465        }
2466
2467        VariableInfo* add(tokenid_t tok) {
2468            Token& token = (*mpTokenTable)[tok];
2469            VariableInfo* pOldV = token.mpVariableInfo;
2470            VariableInfo* pNewV =
2471                (VariableInfo*) mpArena->alloc(sizeof(VariableInfo));
2472            memset(pNewV, 0, sizeof(VariableInfo));
2473            pNewV->tok = tok;
2474            pNewV->level = level();
2475            pNewV->pOldDefinition = pOldV;
2476            token.mpVariableInfo = pNewV;
2477            mStack.push_back(pNewV);
2478            return pNewV;
2479        }
2480
2481        VariableInfo* add(Type* pType) {
2482            VariableInfo* pVI = add(pType->id);
2483            pVI->pType = pType;
2484            return pVI;
2485        }
2486
2487        void forEach(bool (*fn)(VariableInfo*, void*), void* context) {
2488            for (size_t i = 0; i < mStack.size(); i++) {
2489                if (! fn(mStack[i], context)) {
2490                    break;
2491                }
2492            }
2493        }
2494
2495    private:
2496        inline size_t level() {
2497            return mLevelStack.size();
2498        }
2499
2500        struct Mark {
2501            Arena::Mark mArenaMark;
2502            size_t mSymbolHead;
2503        };
2504
2505        Arena* mpArena;
2506        TokenTable* mpTokenTable;
2507        Vector<VariableInfo*> mStack;
2508        Vector<Mark> mLevelStack;
2509    };
2510
2511    int ch; // Current input character, or EOF
2512    tokenid_t tok;      // token
2513    intptr_t tokc;    // token extra info
2514    double tokd;     // floating point constant value
2515    int tokl;         // token operator level
2516    intptr_t rsym; // return symbol
2517    Type* pReturnType; // type of the current function's return.
2518    intptr_t loc; // local variable index
2519    char* glo;  // global variable index
2520    String mTokenString;
2521    char* dptr; // Macro state: Points to macro text during macro playback.
2522    int dch;    // Macro state: Saves old value of ch during a macro playback.
2523    char* pGlobalBase;
2524
2525    // Arena for the duration of the compile
2526    Arena mGlobalArena;
2527    // Arena for data that's only needed when compiling a single function
2528    Arena mLocalArena;
2529
2530    TokenTable mTokenTable;
2531    SymbolStack mGlobals;
2532    SymbolStack mLocals;
2533
2534    // Prebuilt types, makes things slightly faster.
2535    Type* mkpInt;        // int
2536    Type* mkpChar;       // char
2537    Type* mkpVoid;       // void
2538    Type* mkpFloat;
2539    Type* mkpDouble;
2540    Type* mkpIntFn;
2541    Type* mkpIntPtr;
2542    Type* mkpCharPtr;
2543    Type* mkpFloatPtr;
2544    Type* mkpDoublePtr;
2545    Type* mkpPtrIntFn;
2546
2547    InputStream* file;
2548
2549    CodeBuf codeBuf;
2550    CodeGenerator* pGen;
2551
2552    String mErrorBuf;
2553
2554    String mPragmas;
2555    int mPragmaStringCount;
2556
2557    static const int ALLOC_SIZE = 99999;
2558
2559    static const int TOK_DUMMY = 1;
2560    static const int TOK_NUM = 2;
2561    static const int TOK_NUM_FLOAT = 3;
2562    static const int TOK_NUM_DOUBLE = 4;
2563
2564    // 3..255 are character and/or operators
2565
2566    // Keywords start at 0x100 and increase by 1
2567    // Order has to match string list in "internKeywords".
2568    enum {
2569        TOK_KEYWORD = TokenTable::TOKEN_BASE,
2570        TOK_INT = TOK_KEYWORD,
2571        TOK_CHAR,
2572        TOK_VOID,
2573        TOK_IF,
2574        TOK_ELSE,
2575        TOK_WHILE,
2576        TOK_BREAK,
2577        TOK_RETURN,
2578        TOK_FOR,
2579        TOK_PRAGMA,
2580        TOK_DEFINE,
2581        TOK_AUTO,
2582        TOK_CASE,
2583        TOK_CONST,
2584        TOK_CONTINUE,
2585        TOK_DEFAULT,
2586        TOK_DO,
2587        TOK_DOUBLE,
2588        TOK_ENUM,
2589        TOK_EXTERN,
2590        TOK_FLOAT,
2591        TOK_GOTO,
2592        TOK_LONG,
2593        TOK_REGISTER,
2594        TOK_SHORT,
2595        TOK_SIGNED,
2596        TOK_SIZEOF,
2597        TOK_STATIC,
2598        TOK_STRUCT,
2599        TOK_SWITCH,
2600        TOK_TYPEDEF,
2601        TOK_UNION,
2602        TOK_UNSIGNED,
2603        TOK_VOLATILE,
2604        TOK__BOOL,
2605        TOK__COMPLEX,
2606        TOK__IMAGINARY,
2607        TOK_INLINE,
2608        TOK_RESTRICT,
2609        // Symbols start after tokens
2610        TOK_SYMBOL
2611    };
2612
2613    static const int LOCAL = 0x200;
2614
2615    static const int SYM_FORWARD = 0;
2616    static const int SYM_DEFINE = 1;
2617
2618    /* tokens in string heap */
2619    static const int TAG_TOK = ' ';
2620
2621    static const int OP_INCREMENT = 0;
2622    static const int OP_DECREMENT = 1;
2623    static const int OP_MUL = 2;
2624    static const int OP_DIV = 3;
2625    static const int OP_MOD = 4;
2626    static const int OP_PLUS = 5;
2627    static const int OP_MINUS = 6;
2628    static const int OP_SHIFT_LEFT = 7;
2629    static const int OP_SHIFT_RIGHT = 8;
2630    static const int OP_LESS_EQUAL = 9;
2631    static const int OP_GREATER_EQUAL = 10;
2632    static const int OP_LESS = 11;
2633    static const int OP_GREATER = 12;
2634    static const int OP_EQUALS = 13;
2635    static const int OP_NOT_EQUALS = 14;
2636    static const int OP_LOGICAL_AND = 15;
2637    static const int OP_LOGICAL_OR = 16;
2638    static const int OP_BIT_AND = 17;
2639    static const int OP_BIT_XOR = 18;
2640    static const int OP_BIT_OR = 19;
2641    static const int OP_BIT_NOT = 20;
2642    static const int OP_LOGICAL_NOT = 21;
2643    static const int OP_COUNT = 22;
2644
2645    /* Operators are searched from front, the two-character operators appear
2646     * before the single-character operators with the same first character.
2647     * @ is used to pad out single-character operators.
2648     */
2649    static const char* operatorChars;
2650    static const char operatorLevel[];
2651
2652    /* Called when we detect an internal problem. Does nothing in production.
2653     *
2654     */
2655    void internalError() {
2656        * (char*) 0 = 0;
2657    }
2658
2659    void assert(bool isTrue) {
2660        if (!isTrue) {
2661            internalError();
2662        }
2663    }
2664
2665    bool isSymbol(tokenid_t t) {
2666        return t >= TOK_SYMBOL &&
2667            ((size_t) (t-TOK_SYMBOL)) < mTokenTable.size();
2668    }
2669
2670    bool isSymbolOrKeyword(tokenid_t t) {
2671        return t >= TOK_KEYWORD &&
2672            ((size_t) (t-TOK_KEYWORD)) < mTokenTable.size();
2673    }
2674
2675    VariableInfo* VI(tokenid_t t) {
2676        assert(isSymbol(t));
2677        VariableInfo* pV = mTokenTable[t].mpVariableInfo;
2678        if (pV && pV->tok != t) {
2679            internalError();
2680        }
2681        return pV;
2682    }
2683
2684    inline bool isDefined(tokenid_t t) {
2685        return t >= TOK_SYMBOL && VI(t) != 0;
2686    }
2687
2688    const char* nameof(tokenid_t t) {
2689        assert(isSymbolOrKeyword(t));
2690        return mTokenTable[t].pText;
2691    }
2692
2693    void pdef(int t) {
2694        mTokenString.append(t);
2695    }
2696
2697    void inp() {
2698        if (dptr) {
2699            ch = *dptr++;
2700            if (ch == 0) {
2701                dptr = 0;
2702                ch = dch;
2703            }
2704        } else
2705            ch = file->getChar();
2706#if 0
2707        printf("ch='%c' 0x%x\n", ch, ch);
2708#endif
2709    }
2710
2711    int isid() {
2712        return isalnum(ch) | (ch == '_');
2713    }
2714
2715    /* read a character constant, advances ch to after end of constant */
2716    int getq() {
2717        int val = ch;
2718        if (ch == '\\') {
2719            inp();
2720            if (isoctal(ch)) {
2721                // 1 to 3 octal characters.
2722                val = 0;
2723                for(int i = 0; i < 3; i++) {
2724                    if (isoctal(ch)) {
2725                        val = (val << 3) + ch - '0';
2726                        inp();
2727                    }
2728                }
2729                return val;
2730            } else if (ch == 'x' || ch == 'X') {
2731                // N hex chars
2732                inp();
2733                if (! isxdigit(ch)) {
2734                    error("'x' character escape requires at least one digit.");
2735                } else {
2736                    val = 0;
2737                    while (isxdigit(ch)) {
2738                        int d = ch;
2739                        if (isdigit(d)) {
2740                            d -= '0';
2741                        } else if (d <= 'F') {
2742                            d = d - 'A' + 10;
2743                        } else {
2744                            d = d - 'a' + 10;
2745                        }
2746                        val = (val << 4) + d;
2747                        inp();
2748                    }
2749                }
2750            } else {
2751                int val = ch;
2752                switch (ch) {
2753                    case 'a':
2754                        val = '\a';
2755                        break;
2756                    case 'b':
2757                        val = '\b';
2758                        break;
2759                    case 'f':
2760                        val = '\f';
2761                        break;
2762                    case 'n':
2763                        val = '\n';
2764                        break;
2765                    case 'r':
2766                        val = '\r';
2767                        break;
2768                    case 't':
2769                        val = '\t';
2770                        break;
2771                    case 'v':
2772                        val = '\v';
2773                        break;
2774                    case '\\':
2775                        val = '\\';
2776                        break;
2777                    case '\'':
2778                        val = '\'';
2779                        break;
2780                    case '"':
2781                        val = '"';
2782                        break;
2783                    case '?':
2784                        val = '?';
2785                        break;
2786                    default:
2787                        error("Undefined character escape %c", ch);
2788                        break;
2789                }
2790                inp();
2791                return val;
2792            }
2793        } else {
2794            inp();
2795        }
2796        return val;
2797    }
2798
2799    static bool isoctal(int ch) {
2800        return ch >= '0' && ch <= '7';
2801    }
2802
2803    bool acceptCh(int c) {
2804        bool result = c == ch;
2805        if (result) {
2806            pdef(ch);
2807            inp();
2808        }
2809        return result;
2810    }
2811
2812    bool acceptDigitsCh() {
2813        bool result = false;
2814        while (isdigit(ch)) {
2815            result = true;
2816            pdef(ch);
2817            inp();
2818        }
2819        return result;
2820    }
2821
2822    void parseFloat() {
2823        tok = TOK_NUM_DOUBLE;
2824        // mTokenString already has the integral part of the number.
2825        acceptCh('.');
2826        acceptDigitsCh();
2827        bool doExp = true;
2828        if (acceptCh('e') || acceptCh('E')) {
2829            // Don't need to do any extra work
2830        } else if (ch == 'f' || ch == 'F') {
2831            pdef('e'); // So it can be parsed by strtof.
2832            inp();
2833            tok = TOK_NUM_FLOAT;
2834        } else {
2835            doExp = false;
2836        }
2837        if (doExp) {
2838            bool digitsRequired = acceptCh('-');
2839            bool digitsFound = acceptDigitsCh();
2840            if (digitsRequired && ! digitsFound) {
2841                error("malformed exponent");
2842            }
2843        }
2844        char* pText = mTokenString.getUnwrapped();
2845        if (tok == TOK_NUM_FLOAT) {
2846            tokd = strtof(pText, 0);
2847        } else {
2848            tokd = strtod(pText, 0);
2849        }
2850        //fprintf(stderr, "float constant: %s (%d) %g\n", pText, tok, tokd);
2851    }
2852
2853    void next() {
2854        int l, a;
2855
2856        while (isspace(ch) | (ch == '#')) {
2857            if (ch == '#') {
2858                inp();
2859                next();
2860                if (tok == TOK_DEFINE) {
2861                    doDefine();
2862                } else if (tok == TOK_PRAGMA) {
2863                    doPragma();
2864                } else {
2865                    error("Unsupported preprocessor directive \"%s\"",
2866                          mTokenString.getUnwrapped());
2867                }
2868            }
2869            inp();
2870        }
2871        tokl = 0;
2872        tok = ch;
2873        /* encode identifiers & numbers */
2874        if (isid()) {
2875            mTokenString.clear();
2876            while (isid()) {
2877                pdef(ch);
2878                inp();
2879            }
2880            if (isdigit(tok)) {
2881                // Start of a numeric constant. Could be integer, float, or
2882                // double, won't know until we look further.
2883                if (ch == '.' || ch == 'e' || ch == 'e'
2884                    || ch == 'f' || ch == 'F')  {
2885                    parseFloat();
2886                } else {
2887                    // It's an integer constant
2888                    tokc = strtol(mTokenString.getUnwrapped(), 0, 0);
2889                    tok = TOK_NUM;
2890                }
2891            } else {
2892                tok = mTokenTable.intern(mTokenString.getUnwrapped(),
2893                                         mTokenString.len());
2894                // Is this a macro?
2895                char* pMacroDefinition = mTokenTable[tok].mpMacroDefinition;
2896                if(pMacroDefinition) {
2897                    // Yes, it is a macro
2898                    dptr = pMacroDefinition;
2899                    dch = ch;
2900                    inp();
2901                    next();
2902                }
2903            }
2904        } else {
2905            inp();
2906            if (tok == '\'') {
2907                tok = TOK_NUM;
2908                tokc = getq();
2909                if (ch != '\'') {
2910                    error("Expected a ' character, got %c", ch);
2911                } else {
2912                  inp();
2913                }
2914            } else if ((tok == '/') & (ch == '*')) {
2915                inp();
2916                while (ch && ch != EOF) {
2917                    while (ch != '*' && ch != EOF)
2918                        inp();
2919                    inp();
2920                    if (ch == '/')
2921                        ch = 0;
2922                }
2923                if (ch == EOF) {
2924                    error("End of file inside comment.");
2925                }
2926                inp();
2927                next();
2928            } else if ((tok == '/') & (ch == '/')) {
2929                inp();
2930                while (ch && (ch != '\n') && (ch != EOF)) {
2931                    inp();
2932                }
2933                inp();
2934                next();
2935            } else {
2936                const char* t = operatorChars;
2937                int opIndex = 0;
2938                while ((l = *t++) != 0) {
2939                    a = *t++;
2940                    tokl = operatorLevel[opIndex];
2941                    tokc = opIndex;
2942                    if ((l == tok) & ((a == ch) | (a == '@'))) {
2943#if 0
2944                        printf("%c%c -> tokl=%d tokc=0x%x\n",
2945                                l, a, tokl, tokc);
2946#endif
2947                        if (a == ch) {
2948                            inp();
2949                            tok = TOK_DUMMY; /* dummy token for double tokens */
2950                        }
2951                        break;
2952                    }
2953                    opIndex++;
2954                }
2955                if (l == 0) {
2956                    tokl = 0;
2957                    tokc = 0;
2958                }
2959            }
2960        }
2961#if 0
2962        {
2963            String buf;
2964            decodeToken(buf, tok);
2965            fprintf(stderr, "%s\n", buf.getUnwrapped());
2966        }
2967#endif
2968    }
2969
2970    void doDefine() {
2971        next();
2972        tokenid_t name = tok;
2973        String* pName = new String();
2974        while (isspace(ch)) {
2975            inp();
2976        }
2977        if (ch == '(') {
2978            delete pName;
2979            error("Defines with arguments not supported");
2980            return;
2981        }
2982        while (isspace(ch)) {
2983            inp();
2984        }
2985        String value;
2986        while (ch != '\n' && ch != EOF) {
2987            value.append(ch);
2988            inp();
2989        }
2990        char* pDefn = (char*)mGlobalArena.alloc(value.len() + 1);
2991        memcpy(pDefn, value.getUnwrapped(), value.len());
2992        pDefn[value.len()] = 0;
2993        mTokenTable[name].mpMacroDefinition = pDefn;
2994    }
2995
2996    void doPragma() {
2997        // # pragma name(val)
2998        int state = 0;
2999        while(ch != EOF && ch != '\n' && state < 10) {
3000            switch(state) {
3001                case 0:
3002                    if (isspace(ch)) {
3003                        inp();
3004                    } else {
3005                        state++;
3006                    }
3007                    break;
3008                case 1:
3009                    if (isalnum(ch)) {
3010                        mPragmas.append(ch);
3011                        inp();
3012                    } else if (ch == '(') {
3013                        mPragmas.append(0);
3014                        inp();
3015                        state++;
3016                    } else {
3017                        state = 11;
3018                    }
3019                    break;
3020                case 2:
3021                    if (isalnum(ch)) {
3022                        mPragmas.append(ch);
3023                        inp();
3024                    } else if (ch == ')') {
3025                        mPragmas.append(0);
3026                        inp();
3027                        state = 10;
3028                    } else {
3029                        state = 11;
3030                    }
3031                    break;
3032            }
3033        }
3034        if(state != 10) {
3035            error("Unexpected pragma syntax");
3036        }
3037        mPragmaStringCount += 2;
3038    }
3039
3040    virtual void verror(const char* fmt, va_list ap) {
3041        mErrorBuf.printf("%ld: ", file->getLine());
3042        mErrorBuf.vprintf(fmt, ap);
3043        mErrorBuf.printf("\n");
3044    }
3045
3046    void skip(intptr_t c) {
3047        if (tok != c) {
3048            error("'%c' expected", c);
3049        }
3050        next();
3051    }
3052
3053    bool accept(intptr_t c) {
3054        if (tok == c) {
3055            next();
3056            return true;
3057        }
3058        return false;
3059    }
3060
3061    bool acceptStringLiteral() {
3062        if (tok == '"') {
3063            pGen->li((int) glo, mkpCharPtr);
3064            // This while loop merges multiple adjacent string constants.
3065            while (tok == '"') {
3066                while (ch != '"' && ch != EOF) {
3067                    *allocGlobalSpace(1,1) = getq();
3068                }
3069                if (ch != '"') {
3070                    error("Unterminated string constant.");
3071                }
3072                inp();
3073                next();
3074            }
3075            /* Null terminate */
3076            *glo = 0;
3077            /* align heap */
3078            allocGlobalSpace(1,(char*) (((intptr_t) glo + 4) & -4) - glo);
3079
3080            return true;
3081        }
3082        return false;
3083    }
3084    /* Parse and evaluate a unary expression.
3085     * allowAssignment is true if '=' parsing wanted (quick hack)
3086     */
3087    void unary(bool allowAssignment) {
3088        intptr_t n, t, a;
3089        t = 0;
3090        n = 1; /* type of expression 0 = forward, 1 = value, other = lvalue */
3091        if (acceptStringLiteral()) {
3092            // Nothing else to do.
3093        } else {
3094            int c = tokl;
3095            a = tokc;
3096            double ad = tokd;
3097            t = tok;
3098            next();
3099            if (t == TOK_NUM) {
3100                pGen->li(a, mkpInt);
3101            } else if (t == TOK_NUM_FLOAT) {
3102                // Align to 4-byte boundary
3103                glo = (char*) (((intptr_t) glo + 3) & -4);
3104                * (float*) glo = (float) ad;
3105                pGen->loadFloat((int) glo, mkpFloat);
3106                glo += 4;
3107            } else if (t == TOK_NUM_DOUBLE) {
3108                // Align to 8-byte boundary
3109                glo = (char*) (((intptr_t) glo + 7) & -8);
3110                * (double*) glo = ad;
3111                pGen->loadFloat((int) glo, mkpDouble);
3112                glo += 8;
3113            } else if (c == 2) {
3114                /* -, +, !, ~ */
3115                unary(false);
3116                if (t == '!')
3117                    pGen->gUnaryCmp(a, mkpInt);
3118                else if (t == '+') {
3119                    // ignore unary plus.
3120                } else {
3121                    pGen->genUnaryOp(a);
3122                }
3123            } else if (t == '(') {
3124                expr();
3125                skip(')');
3126            } else if (t == '*') {
3127                /* This is a pointer dereference, but we currently only
3128                 * support a pointer dereference if it's immediately
3129                 * in front of a cast. So parse the cast right here.
3130                 */
3131                skip('(');
3132                Type* pCast = expectCastTypeDeclaration(mLocalArena);
3133                // We currently only handle 3 types of cast:
3134                // (int*), (char*) , (int (*)())
3135                if(typeEqual(pCast, mkpIntPtr)) {
3136                    t = TOK_INT;
3137                } else if (typeEqual(pCast, mkpCharPtr)) {
3138                    t = TOK_CHAR;
3139                } else if (typeEqual(pCast, mkpFloatPtr)) {
3140                    t = TOK_FLOAT;
3141                } else if (typeEqual(pCast, mkpDoublePtr)) {
3142                    t = TOK_DOUBLE;
3143                } else if (typeEqual(pCast, mkpPtrIntFn)){
3144                    t = 0;
3145                } else {
3146                    String buffer;
3147                    decodeType(buffer, pCast);
3148                    error("Unsupported cast type %s", buffer.getUnwrapped());
3149                    decodeType(buffer, mkpPtrIntFn);
3150                }
3151                skip(')');
3152                unary(false);
3153                if (accept('=')) {
3154                    pGen->pushR0();
3155                    expr();
3156                    pGen->storeR0ToTOS(pCast);
3157                } else if (t) {
3158                    pGen->loadR0FromR0(pCast);
3159                }
3160                // Else we fall through to the function call below, with
3161                // t == 0 to trigger an indirect function call. Hack!
3162            } else if (t == '&') {
3163                VariableInfo* pVI = VI(tok);
3164                pGen->leaR0((int) pVI->pAddress,
3165                            createPtrType(pVI->pType, mLocalArena));
3166                next();
3167            } else if (t == EOF ) {
3168                error("Unexpected EOF.");
3169            } else if (!checkSymbol(t)) {
3170                // Don't have to do anything special here, the error
3171                // message was printed by checkSymbol() above.
3172            } else {
3173                if (!isDefined(t)) {
3174                    mGlobals.add(t);
3175                    // printf("Adding new global function %s\n", nameof(t));
3176                }
3177                VariableInfo* pVI = VI(t);
3178                n = (intptr_t) pVI->pAddress;
3179                /* forward reference: try dlsym */
3180                if (!n) {
3181                    n = (intptr_t) dlsym(RTLD_DEFAULT, nameof(t));
3182                    if (tok == '(') {
3183                        pVI->pType = mkpIntFn;
3184                    } else {
3185                        pVI->pType = mkpInt;
3186                    }
3187                    pVI->pAddress = (void*) n;
3188                }
3189                if ((tok == '=') & allowAssignment) {
3190                    /* assignment */
3191                    next();
3192                    expr();
3193                    pGen->storeR0(n, pVI->pType);
3194                } else if (tok != '(') {
3195                    /* variable */
3196                    if (!n) {
3197                        error("Undefined variable %s", nameof(t));
3198                    }
3199                    pGen->loadR0(n, tokl == 11, tokc, pVI->pType);
3200                    if (tokl == 11) {
3201                        next();
3202                    }
3203                }
3204            }
3205        }
3206
3207        /* function call */
3208        if (accept('(')) {
3209            Type* pArgList = NULL;
3210            VariableInfo* pVI = NULL;
3211            if (n == 1) { // Indirect function call, push address of fn.
3212                pArgList = pGen->getR0Type()->pTail;
3213                pGen->pushR0();
3214            } else {
3215                pVI = VI(t);
3216                pArgList = pVI->pType->pTail;
3217            }
3218            bool varArgs = pArgList == NULL;
3219            /* push args and invert order */
3220            a = pGen->beginFunctionCallArguments();
3221            int l = 0;
3222            int argCount = 0;
3223            while (tok != ')' && tok != EOF) {
3224                if (! varArgs && !pArgList) {
3225                    error ("Unexpected argument.");
3226                }
3227                expr();
3228                Type* pTargetType;
3229                if (pArgList) {
3230                    pTargetType = pArgList->pHead;
3231                    pArgList = pArgList->pTail;
3232                } else {
3233                    pTargetType = pGen->getR0Type();
3234                    if (pTargetType->tag == TY_FLOAT) {
3235                        pTargetType = mkpDouble;
3236                    }
3237                }
3238                if (pTargetType->tag == TY_VOID) {
3239                    error("Can't pass void value for argument %d",
3240                          argCount + 1);
3241                } else {
3242                    pGen->convertR0(pTargetType);
3243                    l += pGen->storeR0ToArg(l);
3244                }
3245                if (accept(',')) {
3246                    // fine
3247                } else if ( tok != ')') {
3248                    error("Expected ',' or ')'");
3249                }
3250                argCount += 1;
3251            }
3252            if (! varArgs && pArgList) {
3253                error ("Expected more argument(s). Saw %d", argCount);
3254            }
3255            pGen->endFunctionCallArguments(a, l);
3256            skip(')');
3257            if (!n) {
3258                /* forward reference */
3259                pVI->pForward = (void*) pGen->callForward((int) pVI->pForward,
3260                                                          pVI->pType);
3261            } else if (n == 1) {
3262                pGen->callIndirect(l, mkpPtrIntFn->pHead);
3263            } else {
3264                pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset(),
3265                                   VI(t)->pType);
3266            }
3267            pGen->adjustStackAfterCall(l, n == 1);
3268        }
3269    }
3270
3271    /* Recursive descent parser for binary operations.
3272     */
3273    void binaryOp(int level) {
3274        intptr_t t, n, a;
3275        t = 0;
3276        if (level-- == 1)
3277            unary(true);
3278        else {
3279            binaryOp(level);
3280            a = 0;
3281            while (level == tokl) {
3282                n = tok;
3283                t = tokc;
3284                next();
3285
3286                if (level > 8) {
3287                    a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */
3288                    binaryOp(level);
3289                } else {
3290                    pGen->pushR0();
3291                    binaryOp(level);
3292
3293                    if ((level == 4) | (level == 5)) {
3294                        pGen->gcmp(t, mkpInt);
3295                    } else {
3296                        pGen->genOp(t);
3297                    }
3298                }
3299            }
3300            /* && and || output code generation */
3301            if (a && level > 8) {
3302                a = pGen->gtst(t == OP_LOGICAL_OR, a);
3303                pGen->li(t != OP_LOGICAL_OR, mkpInt);
3304                pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */
3305                pGen->gsym(a);
3306                pGen->li(t == OP_LOGICAL_OR, mkpInt);
3307            }
3308        }
3309    }
3310
3311    void expr() {
3312        binaryOp(11);
3313    }
3314
3315    int test_expr() {
3316        expr();
3317        return pGen->gtst(0, 0);
3318    }
3319
3320    void block(intptr_t l, bool outermostFunctionBlock) {
3321        intptr_t a, n, t;
3322
3323        Type* pBaseType;
3324        if ((pBaseType = acceptPrimitiveType(mLocalArena))) {
3325            /* declarations */
3326            localDeclarations(pBaseType);
3327        } else if (tok == TOK_IF) {
3328            next();
3329            skip('(');
3330            a = test_expr();
3331            skip(')');
3332            block(l, false);
3333            if (tok == TOK_ELSE) {
3334                next();
3335                n = pGen->gjmp(0); /* jmp */
3336                pGen->gsym(a);
3337                block(l, false);
3338                pGen->gsym(n); /* patch else jmp */
3339            } else {
3340                pGen->gsym(a); /* patch if test */
3341            }
3342        } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) {
3343            t = tok;
3344            next();
3345            skip('(');
3346            if (t == TOK_WHILE) {
3347                n = codeBuf.getPC(); // top of loop, target of "next" iteration
3348                a = test_expr();
3349            } else {
3350                if (tok != ';')
3351                    expr();
3352                skip(';');
3353                n = codeBuf.getPC();
3354                a = 0;
3355                if (tok != ';')
3356                    a = test_expr();
3357                skip(';');
3358                if (tok != ')') {
3359                    t = pGen->gjmp(0);
3360                    expr();
3361                    pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset());
3362                    pGen->gsym(t);
3363                    n = t + 4;
3364                }
3365            }
3366            skip(')');
3367            block((intptr_t) &a, false);
3368            pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */
3369            pGen->gsym(a);
3370        } else if (tok == '{') {
3371            if (! outermostFunctionBlock) {
3372                mLocals.pushLevel();
3373            }
3374            next();
3375            while (tok != '}' && tok != EOF)
3376                block(l, false);
3377            skip('}');
3378            if (! outermostFunctionBlock) {
3379                mLocals.popLevel();
3380            }
3381        } else {
3382            if (accept(TOK_RETURN)) {
3383                if (tok != ';') {
3384                    expr();
3385                    if (pReturnType->tag == TY_VOID) {
3386                        error("Must not return a value from a void function");
3387                    } else {
3388                        pGen->convertR0(pReturnType);
3389                    }
3390                } else {
3391                    if (pReturnType->tag != TY_VOID) {
3392                        error("Must specify a value here");
3393                    }
3394                }
3395                rsym = pGen->gjmp(rsym); /* jmp */
3396            } else if (accept(TOK_BREAK)) {
3397                *(int *) l = pGen->gjmp(*(int *) l);
3398            } else if (tok != ';')
3399                expr();
3400            skip(';');
3401        }
3402    }
3403
3404    bool typeEqual(Type* a, Type* b) {
3405        if (a == b) {
3406            return true;
3407        }
3408        if (a == NULL || b == NULL) {
3409            return false;
3410        }
3411        TypeTag at = a->tag;
3412        if (at != b->tag) {
3413            return false;
3414        }
3415        if (at == TY_POINTER) {
3416            return typeEqual(a->pHead, b->pHead);
3417        } else if (at == TY_FUNC || at == TY_PARAM) {
3418            return typeEqual(a->pHead, b->pHead)
3419                && typeEqual(a->pTail, b->pTail);
3420        }
3421        return true;
3422    }
3423
3424    Type* createType(TypeTag tag, Type* pHead, Type* pTail, Arena& arena) {
3425        assert(tag >= TY_INT && tag <= TY_PARAM);
3426        Type* pType = (Type*) arena.alloc(sizeof(Type));
3427        memset(pType, 0, sizeof(*pType));
3428        pType->tag = tag;
3429        pType->pHead = pHead;
3430        pType->pTail = pTail;
3431        return pType;
3432    }
3433
3434    Type* createPtrType(Type* pType, Arena& arena) {
3435        return createType(TY_POINTER, pType, NULL, arena);
3436    }
3437
3438    /**
3439     * Try to print a type in declaration order
3440     */
3441    void decodeType(String& buffer, Type* pType) {
3442        buffer.clear();
3443        if (pType == NULL) {
3444            buffer.appendCStr("null");
3445            return;
3446        }
3447        decodeTypeImp(buffer, pType);
3448    }
3449
3450    void decodeTypeImp(String& buffer, Type* pType) {
3451        decodeTypeImpPrefix(buffer, pType);
3452
3453        String temp;
3454        if (pType->id != 0) {
3455            decodeToken(temp, pType->id);
3456            buffer.append(temp);
3457        }
3458
3459        decodeTypeImpPostfix(buffer, pType);
3460    }
3461
3462    void decodeTypeImpPrefix(String& buffer, Type* pType) {
3463        TypeTag tag = pType->tag;
3464
3465        if (tag >= TY_INT && tag <= TY_VOID) {
3466            switch (tag) {
3467                case TY_INT:
3468                    buffer.appendCStr("int");
3469                    break;
3470                case TY_CHAR:
3471                    buffer.appendCStr("char");
3472                    break;
3473                case TY_VOID:
3474                    buffer.appendCStr("void");
3475                    break;
3476                case TY_FLOAT:
3477                    buffer.appendCStr("float");
3478                    break;
3479                case TY_DOUBLE:
3480                    buffer.appendCStr("double");
3481                    break;
3482                default:
3483                    break;
3484            }
3485            buffer.append(' ');
3486        }
3487
3488        switch (tag) {
3489            case TY_INT:
3490                break;
3491            case TY_CHAR:
3492                break;
3493            case TY_VOID:
3494                 break;
3495            case TY_FLOAT:
3496                 break;
3497            case TY_DOUBLE:
3498                break;
3499            case TY_POINTER:
3500                decodeTypeImpPrefix(buffer, pType->pHead);
3501                if(pType->pHead && pType->pHead->tag == TY_FUNC) {
3502                    buffer.append('(');
3503                }
3504                buffer.append('*');
3505                break;
3506            case TY_FUNC:
3507                decodeTypeImp(buffer, pType->pHead);
3508                break;
3509            case TY_PARAM:
3510                decodeTypeImp(buffer, pType->pHead);
3511                break;
3512            default:
3513                String temp;
3514                temp.printf("Unknown tag %d", pType->tag);
3515                buffer.append(temp);
3516                break;
3517        }
3518    }
3519
3520    void decodeTypeImpPostfix(String& buffer, Type* pType) {
3521        TypeTag tag = pType->tag;
3522
3523        switch(tag) {
3524            case TY_POINTER:
3525                if(pType->pHead && pType->pHead->tag == TY_FUNC) {
3526                    buffer.append(')');
3527                }
3528                decodeTypeImpPostfix(buffer, pType->pHead);
3529                break;
3530            case TY_FUNC:
3531                buffer.append('(');
3532                for(Type* pArg = pType->pTail; pArg; pArg = pArg->pTail) {
3533                    decodeTypeImp(buffer, pArg);
3534                    if (pArg->pTail) {
3535                        buffer.appendCStr(", ");
3536                    }
3537                }
3538                buffer.append(')');
3539                break;
3540            default:
3541                break;
3542        }
3543    }
3544
3545    void printType(Type* pType) {
3546        String buffer;
3547        decodeType(buffer, pType);
3548        fprintf(stderr, "%s\n", buffer.getUnwrapped());
3549    }
3550
3551    Type* acceptPrimitiveType(Arena& arena) {
3552        Type* pType;
3553        if (tok == TOK_INT) {
3554            pType = mkpInt;
3555        } else if (tok == TOK_CHAR) {
3556            pType = mkpChar;
3557        } else if (tok == TOK_VOID) {
3558            pType = mkpVoid;
3559        } else if (tok == TOK_FLOAT) {
3560            pType = mkpFloat;
3561        } else if (tok == TOK_DOUBLE) {
3562            pType = mkpDouble;
3563        } else {
3564            return NULL;
3565        }
3566        next();
3567        return pType;
3568    }
3569
3570    Type* acceptDeclaration(Type* pType, bool nameAllowed, bool nameRequired,
3571                            Arena& arena) {
3572        tokenid_t declName = 0;
3573        pType = acceptDecl2(pType, declName, nameAllowed,
3574                                  nameRequired, arena);
3575        if (declName) {
3576            // Clone the parent type so we can set a unique ID
3577            pType = createType(pType->tag, pType->pHead,
3578                                      pType->pTail, arena);
3579
3580            pType->id = declName;
3581        }
3582        // fprintf(stderr, "Parsed a declaration:       ");
3583        // printType(pType);
3584        return pType;
3585    }
3586
3587    Type* expectDeclaration(Type* pBaseType, Arena& arena) {
3588        Type* pType = acceptDeclaration(pBaseType, true, true, arena);
3589        if (! pType) {
3590            error("Expected a declaration");
3591        }
3592        return pType;
3593    }
3594
3595    /* Used for accepting types that appear in casts */
3596    Type* acceptCastTypeDeclaration(Arena& arena) {
3597        Type* pType = acceptPrimitiveType(arena);
3598        if (pType) {
3599            pType = acceptDeclaration(pType, false, false, arena);
3600        }
3601        return pType;
3602    }
3603
3604    Type* expectCastTypeDeclaration(Arena& arena) {
3605        Type* pType = acceptCastTypeDeclaration(arena);
3606        if (! pType) {
3607            error("Expected a declaration");
3608        }
3609        return pType;
3610    }
3611
3612    Type* acceptDecl2(Type* pType, tokenid_t& declName,
3613                      bool nameAllowed, bool nameRequired, Arena& arena) {
3614        int ptrCounter = 0;
3615        while (accept('*')) {
3616            ptrCounter++;
3617        }
3618        pType = acceptDecl3(pType, declName, nameAllowed, nameRequired, arena);
3619        while (ptrCounter-- > 0) {
3620            pType = createType(TY_POINTER, pType, NULL, arena);
3621        }
3622        return pType;
3623    }
3624
3625    Type* acceptDecl3(Type* pType, tokenid_t& declName,
3626                      bool nameAllowed, bool nameRequired, Arena& arena) {
3627        // direct-dcl :
3628        //   name
3629        //  (dcl)
3630        //   direct-dcl()
3631        //   direct-dcl[]
3632        Type* pNewHead = NULL;
3633        if (accept('(')) {
3634            pNewHead = acceptDecl2(pNewHead, declName, nameAllowed,
3635                                nameRequired, arena);
3636            skip(')');
3637        } else if ((declName = acceptSymbol()) != 0) {
3638            if (nameAllowed == false && declName) {
3639                error("Symbol %s not allowed here", nameof(declName));
3640            } else if (nameRequired && ! declName) {
3641                String temp;
3642                decodeToken(temp, tok);
3643                error("Expected symbol. Got %s", temp.getUnwrapped());
3644            }
3645        }
3646        while (accept('(')) {
3647            // Function declaration
3648            Type* pTail = acceptArgs(nameAllowed, arena);
3649            pType = createType(TY_FUNC, pType, pTail, arena);
3650            skip(')');
3651        }
3652
3653        if (pNewHead) {
3654            Type* pA = pNewHead;
3655            while (pA->pHead) {
3656                pA = pA->pHead;
3657            }
3658            pA->pHead = pType;
3659            pType = pNewHead;
3660        }
3661        return pType;
3662    }
3663
3664    Type* acceptArgs(bool nameAllowed, Arena& arena) {
3665        Type* pHead = NULL;
3666        Type* pTail = NULL;
3667        for(;;) {
3668            Type* pBaseArg = acceptPrimitiveType(arena);
3669            if (pBaseArg) {
3670                Type* pArg = acceptDeclaration(pBaseArg, nameAllowed, false,
3671                                               arena);
3672                if (pArg) {
3673                    Type* pParam = createType(TY_PARAM, pArg, NULL, arena);
3674                    if (!pHead) {
3675                        pHead = pParam;
3676                        pTail = pParam;
3677                    } else {
3678                        pTail->pTail = pParam;
3679                        pTail = pParam;
3680                    }
3681                }
3682            }
3683            if (! accept(',')) {
3684                break;
3685            }
3686        }
3687        return pHead;
3688    }
3689
3690    Type* expectPrimitiveType(Arena& arena) {
3691        Type* pType = acceptPrimitiveType(arena);
3692        if (!pType) {
3693            String buf;
3694            decodeToken(buf, tok);
3695            error("Expected a type, got %s", buf.getUnwrapped());
3696        }
3697        return pType;
3698    }
3699
3700    void addGlobalSymbol(Type* pDecl) {
3701        tokenid_t t = pDecl->id;
3702        VariableInfo* pVI = VI(t);
3703        if(pVI && pVI->pAddress) {
3704            reportDuplicate(t);
3705        }
3706        mGlobals.add(pDecl);
3707    }
3708
3709    void reportDuplicate(tokenid_t t) {
3710        error("Duplicate definition of %s", nameof(t));
3711    }
3712
3713    void addLocalSymbol(Type* pDecl) {
3714        tokenid_t t = pDecl->id;
3715        if (mLocals.isDefinedAtCurrentLevel(t)) {
3716            reportDuplicate(t);
3717        }
3718        mLocals.add(pDecl);
3719    }
3720
3721    void localDeclarations(Type* pBaseType) {
3722        intptr_t a;
3723
3724        while (pBaseType) {
3725            while (tok != ';' && tok != EOF) {
3726                Type* pDecl = expectDeclaration(pBaseType, mLocalArena);
3727                if (!pDecl) {
3728                    break;
3729                }
3730                int variableAddress = 0;
3731                addLocalSymbol(pDecl);
3732                loc = loc + pGen->sizeOf(pDecl);
3733                loc = loc + 4;
3734                variableAddress = -loc;
3735                VI(pDecl->id)->pAddress = (void*) variableAddress;
3736                if (accept('=')) {
3737                    /* assignment */
3738                    expr();
3739                    pGen->storeR0(variableAddress, pDecl);
3740                }
3741                if (tok == ',')
3742                    next();
3743            }
3744            skip(';');
3745            pBaseType = acceptPrimitiveType(mLocalArena);
3746        }
3747    }
3748
3749    bool checkSymbol() {
3750        return checkSymbol(tok);
3751    }
3752
3753    void decodeToken(String& buffer, tokenid_t token) {
3754        if (token == EOF ) {
3755            buffer.printf("EOF");
3756        } else if (token == TOK_NUM) {
3757            buffer.printf("numeric constant");
3758        } else if (token >= 0 && token < 256) {
3759            if (token < 32) {
3760                buffer.printf("'\\x%02x'", token);
3761            } else {
3762                buffer.printf("'%c'", token);
3763            }
3764        } else if (token >= TOK_KEYWORD && token < TOK_SYMBOL) {
3765            buffer.printf("keyword \"%s\"", nameof(token));
3766        } else {
3767            buffer.printf("symbol \"%s\"", nameof(token));
3768        }
3769    }
3770
3771    bool checkSymbol(tokenid_t token) {
3772        bool result = token >= TOK_SYMBOL;
3773        if (!result) {
3774            String temp;
3775            decodeToken(temp, token);
3776            error("Expected symbol. Got %s", temp.getUnwrapped());
3777        }
3778        return result;
3779    }
3780
3781    tokenid_t acceptSymbol() {
3782        tokenid_t result = 0;
3783        if (tok >= TOK_SYMBOL) {
3784            result = tok;
3785            next();
3786        }
3787        return result;
3788    }
3789
3790    void globalDeclarations() {
3791        while (tok != EOF) {
3792            Type* pBaseType = expectPrimitiveType(mGlobalArena);
3793            if (!pBaseType) {
3794                break;
3795            }
3796            Type* pDecl = expectDeclaration(pBaseType, mGlobalArena);
3797            if (!pDecl) {
3798                break;
3799            }
3800            if (! isDefined(pDecl->id)) {
3801                addGlobalSymbol(pDecl);
3802            }
3803            VariableInfo* name = VI(pDecl->id);
3804            if (name && name->pAddress) {
3805                error("Already defined global %s", nameof(pDecl->id));
3806            }
3807            if (pDecl->tag < TY_FUNC) {
3808                // it's a variable declaration
3809                for(;;) {
3810                    if (name && !name->pAddress) {
3811                        name->pAddress = (int*) allocGlobalSpace(
3812                                                   pGen->alignment(name->pType),
3813                                                   pGen->sizeOf(name->pType));
3814                    }
3815                    if (accept('=')) {
3816                        if (tok == TOK_NUM) {
3817                            if (name) {
3818                                * (int*) name->pAddress = tokc;
3819                            }
3820                            next();
3821                        } else {
3822                            error("Expected an integer constant");
3823                        }
3824                    }
3825                    if (!accept(',')) {
3826                        break;
3827                    }
3828                    pDecl = expectDeclaration(pBaseType, mGlobalArena);
3829                    if (!pDecl) {
3830                        break;
3831                    }
3832                    if (! isDefined(pDecl->id)) {
3833                        addGlobalSymbol(pDecl);
3834                    }
3835                    name = VI(pDecl->id);
3836                }
3837                skip(';');
3838            } else {
3839                // Function declaration
3840                if (accept(';')) {
3841                    // forward declaration.
3842                } else {
3843                    if (name) {
3844                        /* patch forward references (XXX: does not work for function
3845                         pointers) */
3846                        pGen->gsym((int) name->pForward);
3847                        /* put function address */
3848                        name->pAddress = (void*) codeBuf.getPC();
3849                    }
3850                    // Calculate stack offsets for parameters
3851                    mLocals.pushLevel();
3852                    intptr_t a = 8;
3853                    int argCount = 0;
3854                    for (Type* pP = pDecl->pTail; pP; pP = pP->pTail) {
3855                        Type* pArg = pP->pHead;
3856                        addLocalSymbol(pArg);
3857                        /* read param name and compute offset */
3858                        VI(pArg->id)->pAddress = (void*) a;
3859                        a = a + pGen->stackSizeOf(pArg);
3860                        argCount++;
3861                    }
3862                    rsym = loc = 0;
3863                    pReturnType = pDecl->pHead;
3864                    a = pGen->functionEntry(argCount);
3865                    block(0, true);
3866                    pGen->gsym(rsym);
3867                    pGen->functionExit(argCount, a, loc);
3868                    mLocals.popLevel();
3869                }
3870            }
3871        }
3872    }
3873
3874    char* allocGlobalSpace(size_t alignment, size_t bytes) {
3875        size_t base = (((size_t) glo) + alignment - 1) & ~(alignment-1);
3876        size_t end = base + bytes;
3877        if ((end - (size_t) pGlobalBase) > (size_t) ALLOC_SIZE) {
3878            error("Global space exhausted");
3879            return NULL;
3880        }
3881        char* result = (char*) base;
3882        glo = (char*) end;
3883        return result;
3884    }
3885
3886    void cleanup() {
3887        if (pGlobalBase != 0) {
3888            free(pGlobalBase);
3889            pGlobalBase = 0;
3890        }
3891        if (pGen) {
3892            delete pGen;
3893            pGen = 0;
3894        }
3895        if (file) {
3896            delete file;
3897            file = 0;
3898        }
3899    }
3900
3901    void clear() {
3902        tok = 0;
3903        tokc = 0;
3904        tokl = 0;
3905        ch = 0;
3906        rsym = 0;
3907        loc = 0;
3908        glo = 0;
3909        dptr = 0;
3910        dch = 0;
3911        file = 0;
3912        pGlobalBase = 0;
3913        pGen = 0;
3914        mPragmaStringCount = 0;
3915    }
3916
3917    void setArchitecture(const char* architecture) {
3918        delete pGen;
3919        pGen = 0;
3920
3921        if (architecture != NULL) {
3922#ifdef PROVIDE_ARM_CODEGEN
3923            if (! pGen && strcmp(architecture, "arm") == 0) {
3924                pGen = new ARMCodeGenerator();
3925            }
3926#endif
3927#ifdef PROVIDE_X86_CODEGEN
3928            if (! pGen && strcmp(architecture, "x86") == 0) {
3929                pGen = new X86CodeGenerator();
3930            }
3931#endif
3932            if (!pGen ) {
3933                error("Unknown architecture %s\n", architecture);
3934            }
3935        }
3936
3937        if (pGen == NULL) {
3938#if defined(DEFAULT_ARM_CODEGEN)
3939            pGen = new ARMCodeGenerator();
3940#elif defined(DEFAULT_X86_CODEGEN)
3941            pGen = new X86CodeGenerator();
3942#endif
3943        }
3944        if (pGen == NULL) {
3945            error("No code generator defined.");
3946        } else {
3947            pGen->setErrorSink(this);
3948        }
3949    }
3950
3951public:
3952    struct args {
3953        args() {
3954            architecture = 0;
3955        }
3956        const char* architecture;
3957    };
3958
3959    Compiler() {
3960        clear();
3961    }
3962
3963    ~Compiler() {
3964        cleanup();
3965    }
3966
3967    int compile(const char* text, size_t textLength) {
3968        int result;
3969
3970        cleanup();
3971        clear();
3972        mTokenTable.setArena(&mGlobalArena);
3973        mGlobals.setArena(&mGlobalArena);
3974        mGlobals.setTokenTable(&mTokenTable);
3975        mLocals.setArena(&mLocalArena);
3976        mLocals.setTokenTable(&mTokenTable);
3977
3978        internKeywords();
3979        createPrimitiveTypes();
3980        codeBuf.init(ALLOC_SIZE);
3981        setArchitecture(NULL);
3982        if (!pGen) {
3983            return -1;
3984        }
3985#ifdef PROVIDE_TRACE_CODEGEN
3986            pGen = new TraceCodeGenerator(pGen);
3987#endif
3988            pGen->setErrorSink(this);
3989        pGen->init(&codeBuf);
3990        file = new TextInputStream(text, textLength);
3991        pGlobalBase = (char*) calloc(1, ALLOC_SIZE);
3992        glo = pGlobalBase;
3993        inp();
3994        next();
3995        globalDeclarations();
3996        checkForUndefinedForwardReferences();
3997        result = pGen->finishCompile();
3998        if (result == 0) {
3999            if (mErrorBuf.len()) {
4000                result = -2;
4001            }
4002        }
4003        return result;
4004    }
4005
4006    void createPrimitiveTypes() {
4007        mkpInt = createType(TY_INT, NULL, NULL, mGlobalArena);
4008        mkpChar = createType(TY_CHAR, NULL, NULL, mGlobalArena);
4009        mkpVoid = createType(TY_VOID, NULL, NULL, mGlobalArena);
4010        mkpFloat = createType(TY_FLOAT, NULL, NULL, mGlobalArena);
4011        mkpDouble = createType(TY_DOUBLE, NULL, NULL, mGlobalArena);
4012        mkpIntFn =  createType(TY_FUNC, mkpInt, NULL, mGlobalArena);
4013        mkpIntPtr = createPtrType(mkpInt, mGlobalArena);
4014        mkpCharPtr = createPtrType(mkpChar, mGlobalArena);
4015        mkpFloatPtr = createPtrType(mkpFloat, mGlobalArena);
4016        mkpDoublePtr = createPtrType(mkpDouble, mGlobalArena);
4017        mkpPtrIntFn = createPtrType(mkpIntFn, mGlobalArena);
4018    }
4019
4020    void checkForUndefinedForwardReferences() {
4021        mGlobals.forEach(static_ufrcFn, this);
4022    }
4023
4024    static bool static_ufrcFn(VariableInfo* value, void* context) {
4025        Compiler* pCompiler = (Compiler*) context;
4026        return pCompiler->undefinedForwardReferenceCheck(value);
4027    }
4028
4029    bool undefinedForwardReferenceCheck(VariableInfo* value) {
4030        if (!value->pAddress && value->pForward) {
4031            error("Undefined forward reference: %s",
4032                  mTokenTable[value->tok].pText);
4033        }
4034        return true;
4035    }
4036
4037    int dump(FILE* out) {
4038        fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out);
4039        return 0;
4040    }
4041
4042    int disassemble(FILE* out) {
4043        return pGen->disassemble(out);
4044    }
4045
4046    /* Look through the symbol table to find a symbol.
4047     * If found, return its value.
4048     */
4049    void* lookup(const char* name) {
4050        tokenid_t tok = mTokenTable.intern(name, strlen(name));
4051        VariableInfo* pVariableInfo = VI(tok);
4052        if (pVariableInfo) {
4053            return pVariableInfo->pAddress;
4054        }
4055        return NULL;
4056    }
4057
4058    void getPragmas(ACCsizei* actualStringCount,
4059                    ACCsizei maxStringCount, ACCchar** strings) {
4060        int stringCount = mPragmaStringCount;
4061        if (actualStringCount) {
4062            *actualStringCount = stringCount;
4063        }
4064        if (stringCount > maxStringCount) {
4065            stringCount = maxStringCount;
4066        }
4067        if (strings) {
4068            char* pPragmas = mPragmas.getUnwrapped();
4069            while (stringCount-- > 0) {
4070                *strings++ = pPragmas;
4071                pPragmas += strlen(pPragmas) + 1;
4072            }
4073        }
4074    }
4075
4076    char* getErrorMessage() {
4077        return mErrorBuf.getUnwrapped();
4078    }
4079
4080};
4081
4082const char* Compiler::operatorChars =
4083    "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@";
4084
4085const char Compiler::operatorLevel[] =
4086    {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4,
4087            5, 5, /* ==, != */
4088            9, 10, /* &&, || */
4089            6, 7, 8, /* & ^ | */
4090            2, 2 /* ~ ! */
4091            };
4092
4093#ifdef PROVIDE_ARM_CODEGEN
4094FILE* Compiler::ARMCodeGenerator::disasmOut;
4095#endif
4096
4097#ifdef PROVIDE_X86_CODEGEN
4098const int Compiler::X86CodeGenerator::operatorHelper[] = {
4099        0x1,     // ++
4100        0xff,    // --
4101        0xc1af0f, // *
4102        0xf9f79991, // /
4103        0xf9f79991, // % (With manual assist to swap results)
4104        0xc801, // +
4105        0xd8f7c829, // -
4106        0xe0d391, // <<
4107        0xf8d391, // >>
4108        0xe, // <=
4109        0xd, // >=
4110        0xc, // <
4111        0xf, // >
4112        0x4, // ==
4113        0x5, // !=
4114        0x0, // &&
4115        0x1, // ||
4116        0xc821, // &
4117        0xc831, // ^
4118        0xc809, // |
4119        0xd0f7, // ~
4120        0x4     // !
4121};
4122#endif
4123
4124struct ACCscript {
4125    ACCscript() {
4126        text = 0;
4127        textLength = 0;
4128        accError = ACC_NO_ERROR;
4129    }
4130
4131    ~ACCscript() {
4132        delete text;
4133    }
4134
4135    void setError(ACCenum error) {
4136        if (accError == ACC_NO_ERROR && error != ACC_NO_ERROR) {
4137            accError = error;
4138        }
4139    }
4140
4141    ACCenum getError() {
4142        ACCenum result = accError;
4143        accError = ACC_NO_ERROR;
4144        return result;
4145    }
4146
4147    Compiler compiler;
4148    char* text;
4149    int textLength;
4150    ACCenum accError;
4151};
4152
4153
4154extern "C"
4155ACCscript* accCreateScript() {
4156    return new ACCscript();
4157}
4158
4159extern "C"
4160ACCenum accGetError( ACCscript* script ) {
4161    return script->getError();
4162}
4163
4164extern "C"
4165void accDeleteScript(ACCscript* script) {
4166    delete script;
4167}
4168
4169extern "C"
4170void accScriptSource(ACCscript* script,
4171    ACCsizei count,
4172    const ACCchar ** string,
4173    const ACCint * length) {
4174    int totalLength = 0;
4175    for(int i = 0; i < count; i++) {
4176        int len = -1;
4177        const ACCchar* s = string[i];
4178        if (length) {
4179            len = length[i];
4180        }
4181        if (len < 0) {
4182            len = strlen(s);
4183        }
4184        totalLength += len;
4185    }
4186    delete script->text;
4187    char* text = new char[totalLength + 1];
4188    script->text = text;
4189    script->textLength = totalLength;
4190    char* dest = text;
4191    for(int i = 0; i < count; i++) {
4192        int len = -1;
4193        const ACCchar* s = string[i];
4194        if (length) {
4195            len = length[i];
4196        }
4197        if (len < 0) {
4198            len = strlen(s);
4199        }
4200        memcpy(dest, s, len);
4201        dest += len;
4202    }
4203    text[totalLength] = '\0';
4204}
4205
4206extern "C"
4207void accCompileScript(ACCscript* script) {
4208    int result = script->compiler.compile(script->text, script->textLength);
4209    if (result) {
4210        script->setError(ACC_INVALID_OPERATION);
4211    }
4212}
4213
4214extern "C"
4215void accGetScriptiv(ACCscript* script,
4216    ACCenum pname,
4217    ACCint * params) {
4218    switch (pname) {
4219        case ACC_INFO_LOG_LENGTH:
4220            *params = 0;
4221            break;
4222    }
4223}
4224
4225extern "C"
4226void accGetScriptInfoLog(ACCscript* script,
4227    ACCsizei maxLength,
4228    ACCsizei * length,
4229    ACCchar * infoLog) {
4230    char* message = script->compiler.getErrorMessage();
4231    int messageLength = strlen(message) + 1;
4232    if (length) {
4233        *length = messageLength;
4234    }
4235    if (infoLog && maxLength > 0) {
4236        int trimmedLength = maxLength < messageLength ?
4237                maxLength : messageLength;
4238        memcpy(infoLog, message, trimmedLength);
4239        infoLog[trimmedLength] = 0;
4240    }
4241}
4242
4243extern "C"
4244void accGetScriptLabel(ACCscript* script, const ACCchar * name,
4245                       ACCvoid ** address) {
4246    void* value = script->compiler.lookup(name);
4247    if (value) {
4248        *address = value;
4249    } else {
4250        script->setError(ACC_INVALID_VALUE);
4251    }
4252}
4253
4254extern "C"
4255void accGetPragmas(ACCscript* script, ACCsizei* actualStringCount,
4256                   ACCsizei maxStringCount, ACCchar** strings){
4257    script->compiler.getPragmas(actualStringCount, maxStringCount, strings);
4258}
4259
4260extern "C"
4261void accDisassemble(ACCscript* script) {
4262    script->compiler.disassemble(stderr);
4263}
4264
4265
4266} // namespace acc
4267
4268