acc.cpp revision b67b18f7c2217ae83e9ffc808cb9a58b233ec5bc
1/* 2 * Android "Almost" C Compiler. 3 * This is a compiler for a small subset of the C language, intended for use 4 * in scripting environments where speed and memory footprint are important. 5 * 6 * This code is based upon the "unobfuscated" version of the 7 * Obfuscated Tiny C compiler, see the file LICENSE for details. 8 * 9 */ 10 11#include <ctype.h> 12#include <dlfcn.h> 13#include <errno.h> 14#include <stdarg.h> 15#include <stdint.h> 16#include <stdio.h> 17#include <stdlib.h> 18#include <string.h> 19#include <cutils/hashmap.h> 20 21#if defined(__i386__) 22#include <sys/mman.h> 23#endif 24 25#if defined(__arm__) 26#include <unistd.h> 27#endif 28 29#if defined(__arm__) 30#define DEFAULT_ARM_CODEGEN 31#define PROVIDE_ARM_CODEGEN 32#elif defined(__i386__) 33#define DEFAULT_X86_CODEGEN 34#define PROVIDE_X86_CODEGEN 35#elif defined(__x86_64__) 36#define DEFAULT_X64_CODEGEN 37#define PROVIDE_X64_CODEGEN 38#endif 39 40 41#ifdef PROVIDE_ARM_CODEGEN 42#include "disassem.h" 43#endif 44 45#include <acc/acc.h> 46 47#define LOG_API(...) do {} while(0) 48// #define LOG_API(...) fprintf (stderr, __VA_ARGS__) 49// #define ENABLE_ARM_DISASSEMBLY 50 51// #define PROVIDE_TRACE_CODEGEN 52 53namespace acc { 54 55class ErrorSink { 56public: 57 void error(const char *fmt, ...) { 58 va_list ap; 59 va_start(ap, fmt); 60 verror(fmt, ap); 61 va_end(ap); 62 } 63 64 virtual void verror(const char* fmt, va_list ap) = 0; 65}; 66 67class Compiler : public ErrorSink { 68 class CodeBuf { 69 char* ind; // Output code pointer 70 char* pProgramBase; 71 ErrorSink* mErrorSink; 72 int mSize; 73 bool mOverflowed; 74 75 void release() { 76 if (pProgramBase != 0) { 77 free(pProgramBase); 78 pProgramBase = 0; 79 } 80 } 81 82 bool check(int n) { 83 int newSize = ind - pProgramBase + n; 84 bool overflow = newSize > mSize; 85 if (overflow && !mOverflowed) { 86 mOverflowed = true; 87 if (mErrorSink) { 88 mErrorSink->error("Code too large: %d bytes", newSize); 89 } 90 } 91 return overflow; 92 } 93 94 public: 95 CodeBuf() { 96 pProgramBase = 0; 97 ind = 0; 98 mErrorSink = 0; 99 mSize = 0; 100 mOverflowed = false; 101 } 102 103 ~CodeBuf() { 104 release(); 105 } 106 107 void init(int size) { 108 release(); 109 mSize = size; 110 pProgramBase = (char*) calloc(1, size); 111 ind = pProgramBase; 112 } 113 114 void setErrorSink(ErrorSink* pErrorSink) { 115 mErrorSink = pErrorSink; 116 } 117 118 int o4(int n) { 119 if(check(4)) { 120 return 0; 121 } 122 intptr_t result = (intptr_t) ind; 123 * (int*) ind = n; 124 ind += 4; 125 return result; 126 } 127 128 /* 129 * Output a byte. Handles all values, 0..ff. 130 */ 131 void ob(int n) { 132 if(check(1)) { 133 return; 134 } 135 *ind++ = n; 136 } 137 138 inline void* getBase() { 139 return (void*) pProgramBase; 140 } 141 142 intptr_t getSize() { 143 return ind - pProgramBase; 144 } 145 146 intptr_t getPC() { 147 return (intptr_t) ind; 148 } 149 }; 150 151 /** 152 * A code generator creates an in-memory program, generating the code on 153 * the fly. There is one code generator implementation for each supported 154 * architecture. 155 * 156 * The code generator implements the following abstract machine: 157 * R0 - the main accumulator. 158 * R1 - the secondary accumulator. 159 * FP - a frame pointer for accessing function arguments and local 160 * variables. 161 * SP - a stack pointer for storing intermediate results while evaluating 162 * expressions. The stack pointer grows downwards. 163 * 164 * The function calling convention is that all arguments are placed on the 165 * stack such that the first argument has the lowest address. 166 * After the call, the result is in R0. The caller is responsible for 167 * removing the arguments from the stack. 168 * The R0 and R1 registers are not saved across function calls. The 169 * FP and SP registers are saved. 170 */ 171 172 class CodeGenerator { 173 public: 174 CodeGenerator() { 175 mErrorSink = 0; 176 pCodeBuf = 0; 177 } 178 virtual ~CodeGenerator() {} 179 180 virtual void init(CodeBuf* pCodeBuf) { 181 this->pCodeBuf = pCodeBuf; 182 pCodeBuf->setErrorSink(mErrorSink); 183 } 184 185 virtual void setErrorSink(ErrorSink* pErrorSink) { 186 mErrorSink = pErrorSink; 187 if (pCodeBuf) { 188 pCodeBuf->setErrorSink(mErrorSink); 189 } 190 } 191 192 /* Emit a function prolog. 193 * argCount is the number of arguments. 194 * Save the old value of the FP. 195 * Set the new value of the FP. 196 * Convert from the native platform calling convention to 197 * our stack-based calling convention. This may require 198 * pushing arguments from registers to the stack. 199 * Allocate "N" bytes of stack space. N isn't known yet, so 200 * just emit the instructions for adjusting the stack, and return 201 * the address to patch up. The patching will be done in 202 * functionExit(). 203 * returns address to patch with local variable size. 204 */ 205 virtual int functionEntry(int argCount) = 0; 206 207 /* Emit a function epilog. 208 * Restore the old SP and FP register values. 209 * Return to the calling function. 210 * argCount - the number of arguments to the function. 211 * localVariableAddress - returned from functionEntry() 212 * localVariableSize - the size in bytes of the local variables. 213 */ 214 virtual void functionExit(int argCount, int localVariableAddress, 215 int localVariableSize) = 0; 216 217 /* load immediate value to R0 */ 218 virtual void li(int t) = 0; 219 220 /* Jump to a target, and return the address of the word that 221 * holds the target data, in case it needs to be fixed up later. 222 */ 223 virtual int gjmp(int t) = 0; 224 225 /* Test R0 and jump to a target if the test succeeds. 226 * l = 0: je, l == 1: jne 227 * Return the address of the word that holds the targed data, in 228 * case it needs to be fixed up later. 229 */ 230 virtual int gtst(bool l, int t) = 0; 231 232 /* Compare R1 against R0, and store the boolean result in R0. 233 * op specifies the comparison. 234 */ 235 virtual void gcmp(int op) = 0; 236 237 /* Perform the arithmetic op specified by op. R1 is the 238 * left argument, R0 is the right argument. 239 */ 240 virtual void genOp(int op) = 0; 241 242 /* Set R1 to 0. 243 */ 244 virtual void clearR1() = 0; 245 246 /* Push R0 onto the stack. 247 */ 248 virtual void pushR0() = 0; 249 250 /* Pop R1 off of the stack. 251 */ 252 virtual void popR1() = 0; 253 254 /* Store R0 to the address stored in R1. 255 * isInt is true if a whole 4-byte integer value 256 * should be stored, otherwise a 1-byte character 257 * value should be stored. 258 */ 259 virtual void storeR0ToR1(bool isInt) = 0; 260 261 /* Load R0 from the address stored in R0. 262 * isInt is true if a whole 4-byte integer value 263 * should be loaded, otherwise a 1-byte character 264 * value should be loaded. 265 */ 266 virtual void loadR0FromR0(bool isInt) = 0; 267 268 /* Load the absolute address of a variable to R0. 269 * If ea <= LOCAL, then this is a local variable, or an 270 * argument, addressed relative to FP. 271 * else it is an absolute global address. 272 */ 273 virtual void leaR0(int ea) = 0; 274 275 /* Store R0 to a variable. 276 * If ea <= LOCAL, then this is a local variable, or an 277 * argument, addressed relative to FP. 278 * else it is an absolute global address. 279 */ 280 virtual void storeR0(int ea) = 0; 281 282 /* load R0 from a variable. 283 * If ea <= LOCAL, then this is a local variable, or an 284 * argument, addressed relative to FP. 285 * else it is an absolute global address. 286 * If isIncDec is true, then the stored variable's value 287 * should be post-incremented or post-decremented, based 288 * on the value of op. 289 */ 290 virtual void loadR0(int ea, bool isIncDec, int op) = 0; 291 292 /* Emit code to adjust the stack for a function call. Return the 293 * label for the address of the instruction that adjusts the 294 * stack size. This will be passed as argument "a" to 295 * endFunctionCallArguments. 296 */ 297 virtual int beginFunctionCallArguments() = 0; 298 299 /* Emit code to store R0 to the stack at byte offset l. 300 */ 301 virtual void storeR0ToArg(int l) = 0; 302 303 /* Patch the function call preamble. 304 * a is the address returned from beginFunctionCallArguments 305 * l is the number of bytes the arguments took on the stack. 306 * Typically you would also emit code to convert the argument 307 * list into whatever the native function calling convention is. 308 * On ARM for example you would pop the first 5 arguments into 309 * R0..R4 310 */ 311 virtual void endFunctionCallArguments(int a, int l) = 0; 312 313 /* Emit a call to an unknown function. The argument "symbol" needs to 314 * be stored in the location where the address should go. It forms 315 * a chain. The address will be patched later. 316 * Return the address of the word that has to be patched. 317 */ 318 virtual int callForward(int symbol) = 0; 319 320 /* Call a function using PC-relative addressing. t is the PC-relative 321 * address of the function. It has already been adjusted for the 322 * architectural jump offset, so just store it as-is. 323 */ 324 virtual void callRelative(int t) = 0; 325 326 /* Call a function pointer. L is the number of bytes the arguments 327 * take on the stack. The address of the function is stored at 328 * location SP + l. 329 */ 330 virtual void callIndirect(int l) = 0; 331 332 /* Adjust SP after returning from a function call. l is the 333 * number of bytes of arguments stored on the stack. isIndirect 334 * is true if this was an indirect call. (In which case the 335 * address of the function is stored at location SP + l.) 336 */ 337 virtual void adjustStackAfterCall(int l, bool isIndirect) = 0; 338 339 /* Print a disassembly of the assembled code to out. Return 340 * non-zero if there is an error. 341 */ 342 virtual int disassemble(FILE* out) = 0; 343 344 /* Generate a symbol at the current PC. t is the head of a 345 * linked list of addresses to patch. 346 */ 347 virtual void gsym(int t) = 0; 348 349 /* 350 * Do any cleanup work required at the end of a compile. 351 * For example, an instruction cache might need to be 352 * invalidated. 353 * Return non-zero if there is an error. 354 */ 355 virtual int finishCompile() = 0; 356 357 /** 358 * Adjust relative branches by this amount. 359 */ 360 virtual int jumpOffset() = 0; 361 362 protected: 363 /* 364 * Output a byte. Handles all values, 0..ff. 365 */ 366 void ob(int n) { 367 pCodeBuf->ob(n); 368 } 369 370 intptr_t o4(int data) { 371 return pCodeBuf->o4(data); 372 } 373 374 intptr_t getBase() { 375 return (intptr_t) pCodeBuf->getBase(); 376 } 377 378 intptr_t getPC() { 379 return pCodeBuf->getPC(); 380 } 381 382 intptr_t getSize() { 383 return pCodeBuf->getSize(); 384 } 385 386 void error(const char* fmt,...) { 387 va_list ap; 388 va_start(ap, fmt); 389 mErrorSink->verror(fmt, ap); 390 va_end(ap); 391 } 392 private: 393 CodeBuf* pCodeBuf; 394 ErrorSink* mErrorSink; 395 }; 396 397#ifdef PROVIDE_ARM_CODEGEN 398 399 class ARMCodeGenerator : public CodeGenerator { 400 public: 401 ARMCodeGenerator() {} 402 virtual ~ARMCodeGenerator() {} 403 404 /* returns address to patch with local variable size 405 */ 406 virtual int functionEntry(int argCount) { 407 LOG_API("functionEntry(%d);\n", argCount); 408 // sp -> arg4 arg5 ... 409 // Push our register-based arguments back on the stack 410 if (argCount > 0) { 411 int regArgCount = argCount <= 4 ? argCount : 4; 412 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {} 413 } 414 // sp -> arg0 arg1 ... 415 o4(0xE92D4800); // stmfd sp!, {fp, lr} 416 // sp, fp -> oldfp, retadr, arg0 arg1 .... 417 o4(0xE1A0B00D); // mov fp, sp 418 return o4(0xE24DD000); // sub sp, sp, # <local variables> 419 } 420 421 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 422 LOG_API("functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize); 423 // Patch local variable allocation code: 424 if (localVariableSize < 0 || localVariableSize > 255) { 425 error("localVariables out of range: %d", localVariableSize); 426 } 427 *(char*) (localVariableAddress) = localVariableSize; 428 429 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ... 430 o4(0xE1A0E00B); // mov lr, fp 431 o4(0xE59BB000); // ldr fp, [fp] 432 o4(0xE28ED004); // add sp, lr, #4 433 // sp -> retadr, arg0, ... 434 o4(0xE8BD4000); // ldmfd sp!, {lr} 435 // sp -> arg0 .... 436 if (argCount > 0) { 437 // We store the PC into the lr so we can adjust the sp before 438 // returning. We need to pull off the registers we pushed 439 // earlier. We don't need to actually store them anywhere, 440 // just adjust the stack. 441 int regArgCount = argCount <= 4 ? argCount : 4; 442 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2 443 } 444 o4(0xE12FFF1E); // bx lr 445 } 446 447 /* load immediate value */ 448 virtual void li(int t) { 449 LOG_API("li(%d);\n", t); 450 if (t >= 0 && t < 255) { 451 o4(0xE3A00000 + t); // mov r0, #0 452 } else if (t >= -256 && t < 0) { 453 // mvn means move constant ^ ~0 454 o4(0xE3E00001 - t); // mvn r0, #0 455 } else { 456 o4(0xE51F0000); // ldr r0, .L3 457 o4(0xEA000000); // b .L99 458 o4(t); // .L3: .word 0 459 // .L99: 460 } 461 } 462 463 virtual int gjmp(int t) { 464 LOG_API("gjmp(%d);\n", t); 465 return o4(0xEA000000 | encodeAddress(t)); // b .L33 466 } 467 468 /* l = 0: je, l == 1: jne */ 469 virtual int gtst(bool l, int t) { 470 LOG_API("gtst(%d, %d);\n", l, t); 471 o4(0xE3500000); // cmp r0,#0 472 int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq 473 return o4(branch | encodeAddress(t)); 474 } 475 476 virtual void gcmp(int op) { 477 LOG_API("gcmp(%d);\n", op); 478 o4(0xE1510000); // cmp r1, r1 479 switch(op) { 480 case OP_EQUALS: 481 o4(0x03A00001); // moveq r0,#1 482 o4(0x13A00000); // movne r0,#0 483 break; 484 case OP_NOT_EQUALS: 485 o4(0x03A00000); // moveq r0,#0 486 o4(0x13A00001); // movne r0,#1 487 break; 488 case OP_LESS_EQUAL: 489 o4(0xD3A00001); // movle r0,#1 490 o4(0xC3A00000); // movgt r0,#0 491 break; 492 case OP_GREATER: 493 o4(0xD3A00000); // movle r0,#0 494 o4(0xC3A00001); // movgt r0,#1 495 break; 496 case OP_GREATER_EQUAL: 497 o4(0xA3A00001); // movge r0,#1 498 o4(0xB3A00000); // movlt r0,#0 499 break; 500 case OP_LESS: 501 o4(0xA3A00000); // movge r0,#0 502 o4(0xB3A00001); // movlt r0,#1 503 break; 504 default: 505 error("Unknown comparison op %d", op); 506 break; 507 } 508 } 509 510 virtual void genOp(int op) { 511 LOG_API("genOp(%d);\n", op); 512 switch(op) { 513 case OP_MUL: 514 o4(0x0E0000091); // mul r0,r1,r0 515 break; 516 case OP_DIV: 517 callRuntime(runtime_DIV); 518 break; 519 case OP_MOD: 520 callRuntime(runtime_MOD); 521 break; 522 case OP_PLUS: 523 o4(0xE0810000); // add r0,r1,r0 524 break; 525 case OP_MINUS: 526 o4(0xE0410000); // sub r0,r1,r0 527 break; 528 case OP_SHIFT_LEFT: 529 o4(0xE1A00011); // lsl r0,r1,r0 530 break; 531 case OP_SHIFT_RIGHT: 532 o4(0xE1A00051); // asr r0,r1,r0 533 break; 534 case OP_BIT_AND: 535 o4(0xE0010000); // and r0,r1,r0 536 break; 537 case OP_BIT_XOR: 538 o4(0xE0210000); // eor r0,r1,r0 539 break; 540 case OP_BIT_OR: 541 o4(0xE1810000); // orr r0,r1,r0 542 break; 543 case OP_BIT_NOT: 544 o4(0xE1E00000); // mvn r0, r0 545 break; 546 default: 547 error("Unimplemented op %d\n", op); 548 break; 549 } 550#if 0 551 o(decodeOp(op)); 552 if (op == OP_MOD) 553 o(0x92); /* xchg %edx, %eax */ 554#endif 555 } 556 557 virtual void clearR1() { 558 LOG_API("clearR1();\n"); 559 o4(0xE3A01000); // mov r1, #0 560 } 561 562 virtual void pushR0() { 563 LOG_API("pushR0();\n"); 564 o4(0xE92D0001); // stmfd sp!,{r0} 565 } 566 567 virtual void popR1() { 568 LOG_API("popR1();\n"); 569 o4(0xE8BD0002); // ldmfd sp!,{r1} 570 } 571 572 virtual void storeR0ToR1(bool isInt) { 573 LOG_API("storeR0ToR1(%d);\n", isInt); 574 if (isInt) { 575 o4(0xE5810000); // str r0, [r1] 576 } else { 577 o4(0xE5C10000); // strb r0, [r1] 578 } 579 } 580 581 virtual void loadR0FromR0(bool isInt) { 582 LOG_API("loadR0FromR0(%d);\n", isInt); 583 if (isInt) 584 o4(0xE5900000); // ldr r0, [r0] 585 else 586 o4(0xE5D00000); // ldrb r0, [r0] 587 } 588 589 virtual void leaR0(int ea) { 590 LOG_API("leaR0(%d);\n", ea); 591 if (ea < LOCAL) { 592 // Local, fp relative 593 if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) { 594 error("Offset out of range: %08x", ea); 595 } 596 if (ea < 0) { 597 o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub r0, fp, #ea 598 } else { 599 o4(0xE28B0F00 | (0xff & (ea >> 2))); // add r0, fp, #ea 600 } 601 } else { 602 // Global, absolute. 603 o4(0xE59F0000); // ldr r0, .L1 604 o4(0xEA000000); // b .L99 605 o4(ea); // .L1: .word 0 606 // .L99: 607 } 608 } 609 610 virtual void storeR0(int ea) { 611 LOG_API("storeR0(%d);\n", ea); 612 if (ea < LOCAL) { 613 // Local, fp relative 614 if (ea < -4095 || ea > 4095) { 615 error("Offset out of range: %08x", ea); 616 } 617 if (ea < 0) { 618 o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea] 619 } else { 620 o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea] 621 } 622 } else{ 623 // Global, absolute 624 o4(0xE59F1000); // ldr r1, .L1 625 o4(0xEA000000); // b .L99 626 o4(ea); // .L1: .word 0 627 o4(0xE5810000); // .L99: str r0, [r1] 628 } 629 } 630 631 virtual void loadR0(int ea, bool isIncDec, int op) { 632 LOG_API("loadR0(%d, %d, %d);\n", ea, isIncDec, op); 633 if (ea < LOCAL) { 634 // Local, fp relative 635 if (ea < -4095 || ea > 4095) { 636 error("Offset out of range: %08x", ea); 637 } 638 if (ea < 0) { 639 o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea] 640 } else { 641 o4(0xE59B0000 | (0xfff & ea)); // ldr r0, [fp,#ea] 642 } 643 } else { 644 // Global, absolute 645 o4(0xE59F2000); // ldr r2, .L1 646 o4(0xEA000000); // b .L99 647 o4(ea); // .L1: .word ea 648 o4(0xE5920000); // .L99: ldr r0, [r2] 649 } 650 651 if (isIncDec) { 652 switch (op) { 653 case OP_INCREMENT: 654 o4(0xE2801001); // add r1, r0, #1 655 break; 656 case OP_DECREMENT: 657 o4(0xE2401001); // sub r1, r0, #1 658 break; 659 default: 660 error("unknown opcode: %d", op); 661 } 662 if (ea < LOCAL) { 663 // Local, fp relative 664 // Don't need range check, was already checked above 665 if (ea < 0) { 666 o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea] 667 } else { 668 o4(0xE58B1000 | (0xfff & ea)); // str r1, [fp,#ea] 669 } 670 } else{ 671 // Global, absolute 672 // r2 is already set up from before. 673 o4(0xE5821000); // str r1, [r2] 674 } 675 } 676 } 677 678 virtual int beginFunctionCallArguments() { 679 LOG_API("beginFunctionCallArguments();\n"); 680 return o4(0xE24DDF00); // Placeholder 681 } 682 683 virtual void storeR0ToArg(int l) { 684 LOG_API("storeR0ToArg(%d);\n", l); 685 if (l < 0 || l > 4096-4) { 686 error("l out of range for stack offset: 0x%08x", l); 687 } 688 o4(0xE58D0000 + l); // str r0, [sp, #4] 689 } 690 691 virtual void endFunctionCallArguments(int a, int l) { 692 LOG_API("endFunctionCallArguments(0x%08x, %d);\n", a, l); 693 if (l < 0 || l > 0x3FC) { 694 error("L out of range for stack adjustment: 0x%08x", l); 695 } 696 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2 697 int argCount = l >> 2; 698 if (argCount > 0) { 699 int regArgCount = argCount > 4 ? 4 : argCount; 700 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{} 701 } 702 } 703 704 virtual int callForward(int symbol) { 705 LOG_API("callForward(%d);\n", symbol); 706 // Forward calls are always short (local) 707 return o4(0xEB000000 | encodeAddress(symbol)); 708 } 709 710 virtual void callRelative(int t) { 711 LOG_API("callRelative(%d);\n", t); 712 int abs = t + getPC() + jumpOffset(); 713 LOG_API("abs=%d (0x%08x)\n", abs, abs); 714 if (t >= - (1 << 25) && t < (1 << 25)) { 715 o4(0xEB000000 | encodeAddress(t)); 716 } else { 717 // Long call. 718 o4(0xE59FC000); // ldr r12, .L1 719 o4(0xEA000000); // b .L99 720 o4(t - 12); // .L1: .word 0 721 o4(0xE08CC00F); // .L99: add r12,pc 722 o4(0xE12FFF3C); // blx r12 723 } 724 } 725 726 virtual void callIndirect(int l) { 727 LOG_API("callIndirect(%d);\n", l); 728 int argCount = l >> 2; 729 int poppedArgs = argCount > 4 ? 4 : argCount; 730 int adjustedL = l - (poppedArgs << 2); 731 if (adjustedL < 0 || adjustedL > 4096-4) { 732 error("l out of range for stack offset: 0x%08x", l); 733 } 734 o4(0xE59DC000 | (0xfff & adjustedL)); // ldr r12, [sp,#adjustedL] 735 o4(0xE12FFF3C); // blx r12 736 } 737 738 virtual void adjustStackAfterCall(int l, bool isIndirect) { 739 LOG_API("adjustStackAfterCall(%d, %d);\n", l, isIndirect); 740 int argCount = l >> 2; 741 int stackArgs = argCount > 4 ? argCount - 4 : 0; 742 int stackUse = stackArgs + (isIndirect ? 1 : 0); 743 if (stackUse) { 744 if (stackUse < 0 || stackUse > 255) { 745 error("L out of range for stack adjustment: 0x%08x", l); 746 } 747 o4(0xE28DDF00 | stackUse); // add sp, sp, #stackUse << 2 748 } 749 } 750 751 virtual int jumpOffset() { 752 return 8; 753 } 754 755 /* output a symbol and patch all calls to it */ 756 virtual void gsym(int t) { 757 LOG_API("gsym(0x%x)\n", t); 758 int n; 759 int base = getBase(); 760 int pc = getPC(); 761 LOG_API("pc = 0x%x\n", pc); 762 while (t) { 763 int data = * (int*) t; 764 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2); 765 if (decodedOffset == 0) { 766 n = 0; 767 } else { 768 n = base + decodedOffset; /* next value */ 769 } 770 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK) 771 | encodeRelAddress(pc - t - 8); 772 t = n; 773 } 774 } 775 776 virtual int finishCompile() { 777#if defined(__arm__) 778 const long base = long(getBase()); 779 const long curr = long(getPC()); 780 int err = cacheflush(base, curr, 0); 781 return err; 782#else 783 return 0; 784#endif 785 } 786 787 virtual int disassemble(FILE* out) { 788#ifdef ENABLE_ARM_DISASSEMBLY 789 disasmOut = out; 790 disasm_interface_t di; 791 di.di_readword = disassemble_readword; 792 di.di_printaddr = disassemble_printaddr; 793 di.di_printf = disassemble_printf; 794 795 int base = getBase(); 796 int pc = getPC(); 797 for(int i = base; i < pc; i += 4) { 798 fprintf(out, "%08x: %08x ", i, *(int*) i); 799 ::disasm(&di, i, 0); 800 } 801#endif 802 return 0; 803 } 804 805 private: 806 static FILE* disasmOut; 807 808 static u_int 809 disassemble_readword(u_int address) 810 { 811 return(*((u_int *)address)); 812 } 813 814 static void 815 disassemble_printaddr(u_int address) 816 { 817 fprintf(disasmOut, "0x%08x", address); 818 } 819 820 static void 821 disassemble_printf(const char *fmt, ...) { 822 va_list ap; 823 va_start(ap, fmt); 824 vfprintf(disasmOut, fmt, ap); 825 va_end(ap); 826 } 827 828 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff; 829 830 /** Encode a relative address that might also be 831 * a label. 832 */ 833 int encodeAddress(int value) { 834 int base = getBase(); 835 if (value >= base && value <= getPC() ) { 836 // This is a label, encode it relative to the base. 837 value = value - base; 838 } 839 return encodeRelAddress(value); 840 } 841 842 int encodeRelAddress(int value) { 843 return BRANCH_REL_ADDRESS_MASK & (value >> 2); 844 } 845 846 typedef int (*int2FnPtr)(int a, int b); 847 void callRuntime(int2FnPtr fn) { 848 o4(0xE59F2000); // ldr r2, .L1 849 o4(0xEA000000); // b .L99 850 o4((int) fn); //.L1: .word fn 851 o4(0xE12FFF32); //.L99: blx r2 852 } 853 854 static int runtime_DIV(int a, int b) { 855 return b / a; 856 } 857 858 static int runtime_MOD(int a, int b) { 859 return b % a; 860 } 861 }; 862 863#endif // PROVIDE_ARM_CODEGEN 864 865#ifdef PROVIDE_X86_CODEGEN 866 867 class X86CodeGenerator : public CodeGenerator { 868 public: 869 X86CodeGenerator() {} 870 virtual ~X86CodeGenerator() {} 871 872 /* returns address to patch with local variable size 873 */ 874 virtual int functionEntry(int argCount) { 875 o(0xe58955); /* push %ebp, mov %esp, %ebp */ 876 return oad(0xec81, 0); /* sub $xxx, %esp */ 877 } 878 879 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 880 o(0xc3c9); /* leave, ret */ 881 *(int *) localVariableAddress = localVariableSize; /* save local variables */ 882 } 883 884 /* load immediate value */ 885 virtual void li(int t) { 886 oad(0xb8, t); /* mov $xx, %eax */ 887 } 888 889 virtual int gjmp(int t) { 890 return psym(0xe9, t); 891 } 892 893 /* l = 0: je, l == 1: jne */ 894 virtual int gtst(bool l, int t) { 895 o(0x0fc085); /* test %eax, %eax, je/jne xxx */ 896 return psym(0x84 + l, t); 897 } 898 899 virtual void gcmp(int op) { 900 int t = decodeOp(op); 901 o(0xc139); /* cmp %eax,%ecx */ 902 li(0); 903 o(0x0f); /* setxx %al */ 904 o(t + 0x90); 905 o(0xc0); 906 } 907 908 virtual void genOp(int op) { 909 o(decodeOp(op)); 910 if (op == OP_MOD) 911 o(0x92); /* xchg %edx, %eax */ 912 } 913 914 virtual void clearR1() { 915 oad(0xb9, 0); /* movl $0, %ecx */ 916 } 917 918 virtual void pushR0() { 919 o(0x50); /* push %eax */ 920 } 921 922 virtual void popR1() { 923 o(0x59); /* pop %ecx */ 924 } 925 926 virtual void storeR0ToR1(bool isInt) { 927 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */ 928 } 929 930 virtual void loadR0FromR0(bool isInt) { 931 if (isInt) 932 o(0x8b); /* mov (%eax), %eax */ 933 else 934 o(0xbe0f); /* movsbl (%eax), %eax */ 935 ob(0); /* add zero in code */ 936 } 937 938 virtual void leaR0(int ea) { 939 gmov(10, ea); /* leal EA, %eax */ 940 } 941 942 virtual void storeR0(int ea) { 943 gmov(6, ea); /* mov %eax, EA */ 944 } 945 946 virtual void loadR0(int ea, bool isIncDec, int op) { 947 gmov(8, ea); /* mov EA, %eax */ 948 if (isIncDec) { 949 /* Implement post-increment or post decrement. 950 */ 951 gmov(0, ea); /* 83 ADD */ 952 o(decodeOp(op)); 953 } 954 } 955 956 virtual int beginFunctionCallArguments() { 957 return oad(0xec81, 0); /* sub $xxx, %esp */ 958 } 959 960 virtual void storeR0ToArg(int l) { 961 oad(0x248489, l); /* movl %eax, xxx(%esp) */ 962 } 963 964 virtual void endFunctionCallArguments(int a, int l) { 965 * (int*) a = l; 966 } 967 968 virtual int callForward(int symbol) { 969 return psym(0xe8, symbol); /* call xxx */ 970 } 971 972 virtual void callRelative(int t) { 973 psym(0xe8, t); /* call xxx */ 974 } 975 976 virtual void callIndirect(int l) { 977 oad(0x2494ff, l); /* call *xxx(%esp) */ 978 } 979 980 virtual void adjustStackAfterCall(int l, bool isIndirect) { 981 if (isIndirect) { 982 l += 4; 983 } 984 oad(0xc481, l); /* add $xxx, %esp */ 985 } 986 987 virtual int jumpOffset() { 988 return 5; 989 } 990 991 virtual int disassemble(FILE* out) { 992 return 0; 993 } 994 995 /* output a symbol and patch all calls to it */ 996 virtual void gsym(int t) { 997 int n; 998 int pc = getPC(); 999 while (t) { 1000 n = *(int *) t; /* next value */ 1001 *(int *) t = pc - t - 4; 1002 t = n; 1003 } 1004 } 1005 1006 virtual int finishCompile() { 1007 size_t pagesize = 4096; 1008 size_t base = (size_t) getBase() & ~ (pagesize - 1); 1009 size_t top = ((size_t) getPC() + pagesize - 1) & ~ (pagesize - 1); 1010 int err = mprotect((void*) base, top - base, PROT_READ | PROT_WRITE | PROT_EXEC); 1011 if (err) { 1012 error("mprotect() failed: %d", errno); 1013 } 1014 return err; 1015 } 1016 1017 private: 1018 1019 /** Output 1 to 4 bytes. 1020 * 1021 */ 1022 void o(int n) { 1023 /* cannot use unsigned, so we must do a hack */ 1024 while (n && n != -1) { 1025 ob(n & 0xff); 1026 n = n >> 8; 1027 } 1028 } 1029 1030 /* psym is used to put an instruction with a data field which is a 1031 reference to a symbol. It is in fact the same as oad ! */ 1032 int psym(int n, int t) { 1033 return oad(n, t); 1034 } 1035 1036 /* instruction + address */ 1037 int oad(int n, int t) { 1038 o(n); 1039 int result = getPC(); 1040 o4(t); 1041 return result; 1042 } 1043 1044 1045 static const int operatorHelper[]; 1046 1047 int decodeOp(int op) { 1048 if (op < 0 || op > OP_COUNT) { 1049 error("Out-of-range operator: %d\n", op); 1050 op = 0; 1051 } 1052 return operatorHelper[op]; 1053 } 1054 1055 void gmov(int l, int t) { 1056 o(l + 0x83); 1057 oad((t > -LOCAL && t < LOCAL) << 7 | 5, t); 1058 } 1059 }; 1060 1061#endif // PROVIDE_X86_CODEGEN 1062 1063#ifdef PROVIDE_TRACE_CODEGEN 1064 class TraceCodeGenerator : public CodeGenerator { 1065 private: 1066 CodeGenerator* mpBase; 1067 1068 public: 1069 TraceCodeGenerator(CodeGenerator* pBase) { 1070 mpBase = pBase; 1071 } 1072 1073 virtual ~TraceCodeGenerator() { 1074 delete mpBase; 1075 } 1076 1077 virtual void init(CodeBuf* pCodeBuf) { 1078 mpBase->init(pCodeBuf); 1079 } 1080 1081 void setErrorSink(ErrorSink* pErrorSink) { 1082 mpBase->setErrorSink(pErrorSink); 1083 } 1084 1085 /* returns address to patch with local variable size 1086 */ 1087 virtual int functionEntry(int argCount) { 1088 int result = mpBase->functionEntry(argCount); 1089 fprintf(stderr, "functionEntry(%d) -> %d\n", argCount, result); 1090 return result; 1091 } 1092 1093 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 1094 fprintf(stderr, "functionExit(%d, %d, %d)\n", 1095 argCount, localVariableAddress, localVariableSize); 1096 mpBase->functionExit(argCount, localVariableAddress, localVariableSize); 1097 } 1098 1099 /* load immediate value */ 1100 virtual void li(int t) { 1101 fprintf(stderr, "li(%d)\n", t); 1102 mpBase->li(t); 1103 } 1104 1105 virtual int gjmp(int t) { 1106 int result = mpBase->gjmp(t); 1107 fprintf(stderr, "gjmp(%d) = %d\n", t, result); 1108 return result; 1109 } 1110 1111 /* l = 0: je, l == 1: jne */ 1112 virtual int gtst(bool l, int t) { 1113 int result = mpBase->gtst(l, t); 1114 fprintf(stderr, "gtst(%d,%d) = %d\n", l, t, result); 1115 return result; 1116 } 1117 1118 virtual void gcmp(int op) { 1119 fprintf(stderr, "gcmp(%d)\n", op); 1120 mpBase->gcmp(op); 1121 } 1122 1123 virtual void genOp(int op) { 1124 fprintf(stderr, "genOp(%d)\n", op); 1125 mpBase->genOp(op); 1126 } 1127 1128 virtual void clearR1() { 1129 fprintf(stderr, "clearR1()\n"); 1130 mpBase->clearR1(); 1131 } 1132 1133 virtual void pushR0() { 1134 fprintf(stderr, "pushR0()\n"); 1135 mpBase->pushR0(); 1136 } 1137 1138 virtual void popR1() { 1139 fprintf(stderr, "popR1()\n"); 1140 mpBase->popR1(); 1141 } 1142 1143 virtual void storeR0ToR1(bool isInt) { 1144 fprintf(stderr, "storeR0ToR1(%d)\n", isInt); 1145 mpBase->storeR0ToR1(isInt); 1146 } 1147 1148 virtual void loadR0FromR0(bool isInt) { 1149 fprintf(stderr, "loadR0FromR0(%d)\n", isInt); 1150 mpBase->loadR0FromR0(isInt); 1151 } 1152 1153 virtual void leaR0(int ea) { 1154 fprintf(stderr, "leaR0(%d)\n", ea); 1155 mpBase->leaR0(ea); 1156 } 1157 1158 virtual void storeR0(int ea) { 1159 fprintf(stderr, "storeR0(%d)\n", ea); 1160 mpBase->storeR0(ea); 1161 } 1162 1163 virtual void loadR0(int ea, bool isIncDec, int op) { 1164 fprintf(stderr, "loadR0(%d, %d, %d)\n", ea, isIncDec, op); 1165 mpBase->loadR0(ea, isIncDec, op); 1166 } 1167 1168 virtual int beginFunctionCallArguments() { 1169 int result = mpBase->beginFunctionCallArguments(); 1170 fprintf(stderr, "beginFunctionCallArguments() = %d\n", result); 1171 return result; 1172 } 1173 1174 virtual void storeR0ToArg(int l) { 1175 fprintf(stderr, "storeR0ToArg(%d)\n", l); 1176 mpBase->storeR0ToArg(l); 1177 } 1178 1179 virtual void endFunctionCallArguments(int a, int l) { 1180 fprintf(stderr, "endFunctionCallArguments(%d, %d)\n", a, l); 1181 mpBase->endFunctionCallArguments(a, l); 1182 } 1183 1184 virtual int callForward(int symbol) { 1185 int result = mpBase->callForward(symbol); 1186 fprintf(stderr, "callForward(%d) = %d\n", symbol, result); 1187 return result; 1188 } 1189 1190 virtual void callRelative(int t) { 1191 fprintf(stderr, "callRelative(%d)\n", t); 1192 mpBase->callRelative(t); 1193 } 1194 1195 virtual void callIndirect(int l) { 1196 fprintf(stderr, "callIndirect(%d)\n", l); 1197 mpBase->callIndirect(l); 1198 } 1199 1200 virtual void adjustStackAfterCall(int l, bool isIndirect) { 1201 fprintf(stderr, "adjustStackAfterCall(%d, %d)\n", l, isIndirect); 1202 mpBase->adjustStackAfterCall(l, isIndirect); 1203 } 1204 1205 virtual int jumpOffset() { 1206 return mpBase->jumpOffset(); 1207 } 1208 1209 virtual int disassemble(FILE* out) { 1210 return mpBase->disassemble(out); 1211 } 1212 1213 /* output a symbol and patch all calls to it */ 1214 virtual void gsym(int t) { 1215 fprintf(stderr, "gsym(%d)\n", t); 1216 mpBase->gsym(t); 1217 } 1218 1219 virtual int finishCompile() { 1220 int result = mpBase->finishCompile(); 1221 fprintf(stderr, "finishCompile() = %d\n", result); 1222 return result; 1223 } 1224 }; 1225 1226#endif // PROVIDE_TRACE_CODEGEN 1227 1228 class InputStream { 1229 public: 1230 int getChar() { 1231 if (bumpLine) { 1232 line++; 1233 bumpLine = false; 1234 } 1235 int ch = get(); 1236 if (ch == '\n') { 1237 bumpLine = true; 1238 } 1239 return ch; 1240 } 1241 int getLine() { 1242 return line; 1243 } 1244 protected: 1245 InputStream() : 1246 line(1), bumpLine(false) { 1247 } 1248 private: 1249 virtual int get() = 0; 1250 int line; 1251 bool bumpLine; 1252 }; 1253 1254 class FileInputStream : public InputStream { 1255 public: 1256 FileInputStream(FILE* in) : f(in) {} 1257 private: 1258 virtual int get() { return fgetc(f); } 1259 FILE* f; 1260 }; 1261 1262 class TextInputStream : public InputStream { 1263 public: 1264 TextInputStream(const char* text, size_t textLength) 1265 : pText(text), mTextLength(textLength), mPosition(0) { 1266 } 1267 1268 private: 1269 virtual int get() { 1270 return mPosition < mTextLength ? pText[mPosition++] : EOF; 1271 } 1272 1273 const char* pText; 1274 size_t mTextLength; 1275 size_t mPosition; 1276 }; 1277 1278 class String { 1279 public: 1280 String() { 1281 mpBase = 0; 1282 mUsed = 0; 1283 mSize = 0; 1284 } 1285 1286 String(const char* item, int len, bool adopt) { 1287 if (len < 0) { 1288 len = strlen(item); 1289 } 1290 if (adopt) { 1291 mpBase = (char*) item; 1292 mUsed = len; 1293 mSize = len + 1; 1294 } else { 1295 mpBase = 0; 1296 mUsed = 0; 1297 mSize = 0; 1298 appendBytes(item, len); 1299 } 1300 } 1301 1302 String(const String& other) { 1303 mpBase = 0; 1304 mUsed = 0; 1305 mSize = 0; 1306 appendBytes(other.getUnwrapped(), other.len()); 1307 } 1308 1309 ~String() { 1310 if (mpBase) { 1311 free(mpBase); 1312 } 1313 } 1314 1315 inline char* getUnwrapped() const { 1316 return mpBase; 1317 } 1318 1319 void clear() { 1320 mUsed = 0; 1321 if (mSize > 0) { 1322 mpBase[0] = 0; 1323 } 1324 } 1325 1326 void appendCStr(const char* s) { 1327 appendBytes(s, strlen(s)); 1328 } 1329 1330 void appendBytes(const char* s, int n) { 1331 memcpy(ensure(n), s, n + 1); 1332 } 1333 1334 void append(char c) { 1335 * ensure(1) = c; 1336 } 1337 1338 char* orphan() { 1339 char* result = mpBase; 1340 mpBase = 0; 1341 mUsed = 0; 1342 mSize = 0; 1343 return result; 1344 } 1345 1346 void printf(const char* fmt,...) { 1347 va_list ap; 1348 va_start(ap, fmt); 1349 vprintf(fmt, ap); 1350 va_end(ap); 1351 } 1352 1353 void vprintf(const char* fmt, va_list ap) { 1354 char* temp; 1355 int numChars = vasprintf(&temp, fmt, ap); 1356 memcpy(ensure(numChars), temp, numChars+1); 1357 free(temp); 1358 } 1359 1360 inline size_t len() const { 1361 return mUsed; 1362 } 1363 1364 private: 1365 char* ensure(int n) { 1366 size_t newUsed = mUsed + n; 1367 if (newUsed > mSize) { 1368 size_t newSize = mSize * 2 + 10; 1369 if (newSize < newUsed) { 1370 newSize = newUsed; 1371 } 1372 mpBase = (char*) realloc(mpBase, newSize + 1); 1373 mSize = newSize; 1374 } 1375 mpBase[newUsed] = '\0'; 1376 char* result = mpBase + mUsed; 1377 mUsed = newUsed; 1378 return result; 1379 } 1380 1381 char* mpBase; 1382 size_t mUsed; 1383 size_t mSize; 1384 }; 1385 1386 /** 1387 * Wrap an externally allocated string for use as a hash key. 1388 */ 1389 class FakeString : public String { 1390 public: 1391 FakeString(const char* string, size_t length) : 1392 String((char*) string, length, true) {} 1393 1394 ~FakeString() { 1395 orphan(); 1396 } 1397 }; 1398 1399 template<class V> class StringTable { 1400 public: 1401 StringTable() { 1402 init(10); 1403 } 1404 1405 StringTable(size_t initialCapacity) { 1406 init(initialCapacity); 1407 } 1408 1409 ~StringTable() { 1410 clear(); 1411 hashmapFree(mpMap); 1412 } 1413 1414 void clear() { 1415 hashmapForEach(mpMap, freeKeyValue, this); 1416 } 1417 1418 bool contains(String* pKey) { 1419 bool result = hashmapContainsKey(mpMap, pKey); 1420 return result; 1421 } 1422 1423 V* get(String* pKey) { 1424 V* result = (V*) hashmapGet(mpMap, pKey); 1425 return result; 1426 } 1427 1428 V* remove(String* pKey) { 1429 V* result = (V*) hashmapRemove(mpMap, pKey); 1430 return result; 1431 } 1432 1433 V* put(String* pKey, V* value) { 1434 V* result = (V*) hashmapPut(mpMap, pKey, value); 1435 if (result) { 1436 // The key was not adopted by the map, so delete it here. 1437 delete pKey; 1438 } 1439 return result; 1440 } 1441 1442 protected: 1443 1444 void init(size_t initialCapacity) { 1445 mpMap = hashmapCreate(initialCapacity, hashFn, equalsFn); 1446 } 1447 1448 static int hashFn(void* pKey) { 1449 String* pString = (String*) pKey; 1450 return hashmapHash(pString->getUnwrapped(), pString->len()); 1451 } 1452 1453 static bool equalsFn(void* keyA, void* keyB) { 1454 String* pStringA = (String*) keyA; 1455 String* pStringB = (String*) keyB; 1456 return pStringA->len() == pStringB->len() 1457 && strcmp(pStringA->getUnwrapped(), pStringB->getUnwrapped()) 1458 == 0; 1459 } 1460 1461 static bool freeKeyValue(void* key, void* value, void* context) { 1462 delete (String*) key; 1463 delete (V*) value; 1464 return true; 1465 } 1466 1467 Hashmap* mpMap; 1468 }; 1469 1470 class MacroTable : public StringTable<String> { 1471 public: 1472 MacroTable() : StringTable<String>(10) {} 1473 }; 1474 1475 class KeywordTable { 1476 public: 1477 1478 KeywordTable(){ 1479 mpMap = hashmapCreate(40, hashFn, equalsFn); 1480 put("int", TOK_INT); 1481 put("char", TOK_CHAR); 1482 put("void", TOK_VOID); 1483 put("if", TOK_IF); 1484 put("else", TOK_ELSE); 1485 put("while", TOK_WHILE); 1486 put("break", TOK_BREAK); 1487 put("return", TOK_RETURN); 1488 put("for", TOK_FOR); 1489 put("pragma", TOK_PRAGMA); 1490 } 1491 1492 ~KeywordTable() { 1493 hashmapFree(mpMap); 1494 } 1495 1496 int get(String* key) { 1497 return (int) hashmapGet(mpMap, key->getUnwrapped()); 1498 } 1499 1500 const char* lookupKeyFor(int value) { 1501 FindValContext context; 1502 context.key = 0; 1503 hashmapForEach(mpMap, findKeyFn, &context); 1504 return context.key; 1505 } 1506 1507 private: 1508 void put(const char* kw, int val) { 1509 hashmapPut(mpMap, (void*) kw, (void*) val); 1510 } 1511 1512 static int hashFn(void* pKey) { 1513 char* pString = (char*) pKey; 1514 return hashmapHash(pString, strlen(pString)); 1515 } 1516 1517 static bool equalsFn(void* keyA, void* keyB) { 1518 const char* pStringA = (const char*) keyA; 1519 const char* pStringB = (const char*) keyB; 1520 return strcmp(pStringA, pStringB) == 0; 1521 } 1522 1523 struct FindValContext { 1524 char* key; 1525 int value; 1526 }; 1527 1528 static bool findKeyFn(void* key, void* value, void* context) { 1529 FindValContext* pContext = (FindValContext*) context; 1530 if ((int) value == pContext->value) { 1531 pContext->key = (char*) key; 1532 return false; 1533 } 1534 return true; 1535 } 1536 1537 Hashmap* mpMap; 1538 }; 1539 1540 template<class E> class Array { 1541 public: 1542 Array() { 1543 mpBase = 0; 1544 mUsed = 0; 1545 mSize = 0; 1546 } 1547 1548 ~Array() { 1549 if (mpBase) { 1550 free(mpBase); 1551 } 1552 } 1553 1554 E get(int i) { 1555 if (i < 0 || i > (int) mUsed) { 1556 // error("internal error: Index out of range"); 1557 return E(); 1558 } 1559 return mpBase[i]; 1560 } 1561 1562 void set(int i, E val) { 1563 mpBase[i] = val; 1564 } 1565 1566 void pop() { 1567 if (mUsed > 0) { 1568 mUsed -= 1; 1569 } else { 1570 // error("internal error: Popped empty stack."); 1571 } 1572 } 1573 1574 void push(E item) { 1575 * ensure(1) = item; 1576 } 1577 1578 size_t len() { 1579 return mUsed; 1580 } 1581 1582 private: 1583 E* ensure(int n) { 1584 size_t newUsed = mUsed + n; 1585 if (newUsed > mSize) { 1586 size_t newSize = mSize * 2 + 10; 1587 if (newSize < newUsed) { 1588 newSize = newUsed; 1589 } 1590 mpBase = (E*) realloc(mpBase, sizeof(E) * newSize); 1591 mSize = newSize; 1592 } 1593 E* result = mpBase + mUsed; 1594 mUsed = newUsed; 1595 return result; 1596 } 1597 1598 E* mpBase; 1599 size_t mUsed; 1600 size_t mSize; 1601 }; 1602 1603 struct InputState { 1604 InputStream* pStream; 1605 int oldCh; 1606 }; 1607 1608 struct VariableInfo { 1609 VariableInfo() { 1610 pAddress = 0; 1611 pForward = 0; 1612 } 1613 void* pAddress; 1614 void* pForward; // For a forward direction, linked list of data to fix up 1615 }; 1616 1617 typedef StringTable<VariableInfo> SymbolTable; 1618 1619 class SymbolStack { 1620 public: 1621 SymbolStack() { 1622 mLevel = 0; 1623 addEntry(); 1624 } 1625 1626 void pushLevel() { 1627 mLevel++; 1628 } 1629 1630 void popLevel() { 1631 mLevel--; 1632 Entry e = mStack.get(mStack.len()-1); 1633 if (mLevel < e.level) { 1634 mStack.pop(); 1635 delete e.pTable; 1636 } 1637 } 1638 1639 VariableInfo* get(String* pName) { 1640 int len = mStack.len(); 1641 VariableInfo* v = NULL; 1642 int level = -1; 1643 for (int i = len - 1; i >= 0; i--) { 1644 Entry e = mStack.get(i); 1645 v = e.pTable->get(pName); 1646 if (v) { 1647 level = e.level; 1648 break; 1649 } 1650 } 1651#if 0 1652 fprintf(stderr, "Lookup %s %08x level %d\n", pName->getUnwrapped(), v, level); 1653 if (v) { 1654 fprintf(stderr, " %08x %08x\n", v->pAddress, v->pForward); 1655 } 1656#endif 1657 return v; 1658 } 1659 1660 VariableInfo* addLocal(String* pName) { 1661 int len = mStack.len(); 1662 if (mStack.get(len-1).level != mLevel) { 1663 addEntry(); 1664 len++; 1665 } 1666 return addImp(len-1, pName); 1667 } 1668 1669 VariableInfo* addGlobal(String* pName) { 1670 return addImp(0, pName); 1671 } 1672 1673 private: 1674 VariableInfo* addImp(int entryIndex, String* pName) { 1675 Entry e = mStack.get(entryIndex); 1676 SymbolTable* pTable = e.pTable; 1677 VariableInfo* v = new VariableInfo(); 1678 delete pTable->put(pName, v); 1679#if 0 1680 fprintf(stderr, "Add \"%s\" %08x level %d\n", pName->getUnwrapped(), v, e.level); 1681#endif 1682 return v; 1683 } 1684 1685 void addEntry() { 1686 Entry e; 1687 e.level = mLevel; 1688 e.pTable = new SymbolTable(); 1689 mStack.push(e); 1690 } 1691 1692 struct Entry { 1693 Entry() { 1694 level = 0; 1695 pTable = NULL; 1696 } 1697 int level; 1698 SymbolTable* pTable; 1699 }; 1700 1701 int mLevel; 1702 Array<Entry> mStack; 1703 }; 1704 1705 int ch; // Current input character, or EOF 1706 intptr_t tok; // token 1707 intptr_t tokc; // token extra info 1708 int tokl; // token operator level 1709 intptr_t rsym; // return symbol 1710 intptr_t loc; // local variable index 1711 char* glo; // global variable index 1712 String mTokenString; 1713 char* dptr; // Macro state: Points to macro text during macro playback. 1714 int dch; // Macro state: Saves old value of ch during a macro playback. 1715 char* pGlobalBase; 1716 KeywordTable mKeywords; 1717 SymbolStack mSymbolTable; 1718 InputStream* file; 1719 1720 CodeBuf codeBuf; 1721 CodeGenerator* pGen; 1722 1723 MacroTable mMacros; 1724 Array<InputState> mInputStateStack; 1725 1726 String mErrorBuf; 1727 1728 String mPragmas; 1729 int mPragmaStringCount; 1730 1731 static const int ALLOC_SIZE = 99999; 1732 1733 static const int TOK_DUMMY = 1; 1734 static const int TOK_NUM = 2; 1735 1736 // 3..255 are character and/or operators 1737 1738 // Keywords start at 0x100 and increase by 1 1739 static const int TOK_KEYWORD = 0x100; 1740 static const int TOK_INT = TOK_KEYWORD + 0; 1741 static const int TOK_CHAR = TOK_KEYWORD + 1; 1742 static const int TOK_VOID = TOK_KEYWORD + 2; 1743 static const int TOK_IF = TOK_KEYWORD + 3; 1744 static const int TOK_ELSE = TOK_KEYWORD + 4; 1745 static const int TOK_WHILE = TOK_KEYWORD + 5; 1746 static const int TOK_BREAK = TOK_KEYWORD + 6; 1747 static const int TOK_RETURN = TOK_KEYWORD + 7; 1748 static const int TOK_FOR = TOK_KEYWORD + 8; 1749 static const int TOK_PRAGMA = TOK_KEYWORD + 9; 1750 static const int TOK_DEFINE = TOK_KEYWORD + 10; 1751 1752 static const int TOK_UNDEFINED_SYMBOL = 0x200; 1753 1754 // Symbols start at 0x300, but are really pointers to VariableInfo structs. 1755 static const int TOK_SYMBOL = 0x300; 1756 1757 1758 static const int LOCAL = 0x200; 1759 1760 static const int SYM_FORWARD = 0; 1761 static const int SYM_DEFINE = 1; 1762 1763 /* tokens in string heap */ 1764 static const int TAG_TOK = ' '; 1765 1766 static const int OP_INCREMENT = 0; 1767 static const int OP_DECREMENT = 1; 1768 static const int OP_MUL = 2; 1769 static const int OP_DIV = 3; 1770 static const int OP_MOD = 4; 1771 static const int OP_PLUS = 5; 1772 static const int OP_MINUS = 6; 1773 static const int OP_SHIFT_LEFT = 7; 1774 static const int OP_SHIFT_RIGHT = 8; 1775 static const int OP_LESS_EQUAL = 9; 1776 static const int OP_GREATER_EQUAL = 10; 1777 static const int OP_LESS = 11; 1778 static const int OP_GREATER = 12; 1779 static const int OP_EQUALS = 13; 1780 static const int OP_NOT_EQUALS = 14; 1781 static const int OP_LOGICAL_AND = 15; 1782 static const int OP_LOGICAL_OR = 16; 1783 static const int OP_BIT_AND = 17; 1784 static const int OP_BIT_XOR = 18; 1785 static const int OP_BIT_OR = 19; 1786 static const int OP_BIT_NOT = 20; 1787 static const int OP_LOGICAL_NOT = 21; 1788 static const int OP_COUNT = 22; 1789 1790 /* Operators are searched from front, the two-character operators appear 1791 * before the single-character operators with the same first character. 1792 * @ is used to pad out single-character operators. 1793 */ 1794 static const char* operatorChars; 1795 static const char operatorLevel[]; 1796 1797 void pdef(int t) { 1798 mTokenString.append(t); 1799 } 1800 1801 void inp() { 1802 if (dptr) { 1803 ch = *dptr++; 1804 if (ch == 0) { 1805 dptr = 0; 1806 ch = dch; 1807 } 1808 } else 1809 ch = file->getChar(); 1810#if 0 1811 printf("ch='%c' 0x%x\n", ch, ch); 1812#endif 1813 } 1814 1815 int isid() { 1816 return isalnum(ch) | (ch == '_'); 1817 } 1818 1819 /* read a character constant */ 1820 void getq() { 1821 if (ch == '\\') { 1822 inp(); 1823 if (ch == 'n') 1824 ch = '\n'; 1825 } 1826 } 1827 1828 void next() { 1829 int l, a; 1830 1831 while (isspace(ch) | (ch == '#')) { 1832 if (ch == '#') { 1833 inp(); 1834 next(); 1835 if (tok == TOK_DEFINE) { 1836 doDefine(); 1837 } else if (tok == TOK_PRAGMA) { 1838 doPragma(); 1839 } else { 1840 error("Unsupported preprocessor directive \"%s\"", 1841 mTokenString.getUnwrapped()); 1842 } 1843 } 1844 inp(); 1845 } 1846 tokl = 0; 1847 tok = ch; 1848 /* encode identifiers & numbers */ 1849 if (isid()) { 1850 mTokenString.clear(); 1851 while (isid()) { 1852 pdef(ch); 1853 inp(); 1854 } 1855 if (isdigit(tok)) { 1856 tokc = strtol(mTokenString.getUnwrapped(), 0, 0); 1857 tok = TOK_NUM; 1858 } else { 1859 // Is this a macro? 1860 String* pValue = mMacros.get(&mTokenString); 1861 if (pValue) { 1862 // Yes, it is a macro 1863 dptr = pValue->getUnwrapped(); 1864 dch = ch; 1865 inp(); 1866 next(); 1867 } else { 1868 // Is this a keyword? 1869 int kwtok = mKeywords.get(&mTokenString); 1870 if (kwtok) { 1871 tok = kwtok; 1872 // fprintf(stderr, "tok= keyword %s %x\n", last_id, tok); 1873 } else { 1874 tok = (intptr_t) mSymbolTable.get(&mTokenString); 1875 if (!tok) { 1876 tok = TOK_UNDEFINED_SYMBOL; 1877 } 1878 // fprintf(stderr, "tok= symbol %s %x\n", last_id, tok); 1879 } 1880 } 1881 } 1882 } else { 1883 inp(); 1884 if (tok == '\'') { 1885 tok = TOK_NUM; 1886 getq(); 1887 tokc = ch; 1888 inp(); 1889 inp(); 1890 } else if ((tok == '/') & (ch == '*')) { 1891 inp(); 1892 while (ch) { 1893 while (ch != '*') 1894 inp(); 1895 inp(); 1896 if (ch == '/') 1897 ch = 0; 1898 } 1899 inp(); 1900 next(); 1901 } else if ((tok == '/') & (ch == '/')) { 1902 inp(); 1903 while (ch && (ch != '\n')) { 1904 inp(); 1905 } 1906 inp(); 1907 next(); 1908 } else { 1909 const char* t = operatorChars; 1910 int opIndex = 0; 1911 while ((l = *t++) != 0) { 1912 a = *t++; 1913 tokl = operatorLevel[opIndex]; 1914 tokc = opIndex; 1915 if ((l == tok) & ((a == ch) | (a == '@'))) { 1916#if 0 1917 printf("%c%c -> tokl=%d tokc=0x%x\n", 1918 l, a, tokl, tokc); 1919#endif 1920 if (a == ch) { 1921 inp(); 1922 tok = TOK_DUMMY; /* dummy token for double tokens */ 1923 } 1924 break; 1925 } 1926 opIndex++; 1927 } 1928 if (l == 0) { 1929 tokl = 0; 1930 tokc = 0; 1931 } 1932 } 1933 } 1934#if 0 1935 { 1936 const char* p; 1937 1938 printf("tok=0x%x ", tok); 1939 if (tok >= TOK_KEYWORD) { 1940 printf("'"); 1941 if (tok>= TOK_SYMBOL) 1942 p = sym_stk + 1 + ((char*) tok - (char*) pVarsBase) / 8; 1943 else { 1944 p = mKeywords.lookupKeyFor(tok); 1945 if (!p) { 1946 p = "unknown keyword"; 1947 } 1948 } 1949 while (*p != TAG_TOK && *p) 1950 printf("%c", *p++); 1951 printf("'\n"); 1952 } else if (tok == TOK_NUM) { 1953 printf("%d\n", tokc); 1954 } else { 1955 printf("'%c'\n", tok); 1956 } 1957 } 1958#endif 1959 } 1960 1961 void doDefine() { 1962 String* pName = new String(); 1963 while (isspace(ch)) { 1964 inp(); 1965 } 1966 while (isid()) { 1967 pName->append(ch); 1968 inp(); 1969 } 1970 if (ch == '(') { 1971 delete pName; 1972 error("Defines with arguments not supported"); 1973 return; 1974 } 1975 while (isspace(ch)) { 1976 inp(); 1977 } 1978 String* pValue = new String(); 1979 while (ch != '\n' && ch != EOF) { 1980 pValue->append(ch); 1981 inp(); 1982 } 1983 delete mMacros.put(pName, pValue); 1984 } 1985 1986 void doPragma() { 1987 // # pragma name(val) 1988 int state = 0; 1989 while(ch != EOF && ch != '\n' && state < 10) { 1990 switch(state) { 1991 case 0: 1992 if (isspace(ch)) { 1993 inp(); 1994 } else { 1995 state++; 1996 } 1997 break; 1998 case 1: 1999 if (isalnum(ch)) { 2000 mPragmas.append(ch); 2001 inp(); 2002 } else if (ch == '(') { 2003 mPragmas.append(0); 2004 inp(); 2005 state++; 2006 } else { 2007 state = 11; 2008 } 2009 break; 2010 case 2: 2011 if (isalnum(ch)) { 2012 mPragmas.append(ch); 2013 inp(); 2014 } else if (ch == ')') { 2015 mPragmas.append(0); 2016 inp(); 2017 state = 10; 2018 } else { 2019 state = 11; 2020 } 2021 break; 2022 } 2023 } 2024 if(state != 10) { 2025 error("Unexpected pragma syntax"); 2026 } 2027 mPragmaStringCount += 2; 2028 } 2029 2030 virtual void verror(const char* fmt, va_list ap) { 2031 mErrorBuf.printf("%ld: ", file->getLine()); 2032 mErrorBuf.vprintf(fmt, ap); 2033 mErrorBuf.printf("\n"); 2034 } 2035 2036 void skip(intptr_t c) { 2037 if (tok != c) { 2038 error("'%c' expected", c); 2039 } 2040 next(); 2041 } 2042 2043 /* l is one if '=' parsing wanted (quick hack) */ 2044 void unary(intptr_t l) { 2045 intptr_t n, t, a; 2046 int c; 2047 t = 0; 2048 n = 1; /* type of expression 0 = forward, 1 = value, other = 2049 lvalue */ 2050 if (tok == '\"') { 2051 pGen->li((int) glo); 2052 while (ch != '\"') { 2053 getq(); 2054 *allocGlobalSpace(1) = ch; 2055 inp(); 2056 } 2057 *glo = 0; 2058 /* align heap */ 2059 allocGlobalSpace((char*) (((intptr_t) glo + 4) & -4) - glo); 2060 inp(); 2061 next(); 2062 } else { 2063 c = tokl; 2064 a = tokc; 2065 t = tok; 2066 next(); 2067 if (t == TOK_NUM) { 2068 pGen->li(a); 2069 } else if (c == 2) { 2070 /* -, +, !, ~ */ 2071 unary(0); 2072 pGen->clearR1(); 2073 if (t == '!') 2074 pGen->gcmp(a); 2075 else 2076 pGen->genOp(a); 2077 } else if (t == '(') { 2078 expr(); 2079 skip(')'); 2080 } else if (t == '*') { 2081 /* parse cast */ 2082 skip('('); 2083 t = tok; /* get type */ 2084 next(); /* skip int/char/void */ 2085 next(); /* skip '*' or '(' */ 2086 if (tok == '*') { 2087 /* function type */ 2088 skip('*'); 2089 skip(')'); 2090 skip('('); 2091 skip(')'); 2092 t = 0; 2093 } 2094 skip(')'); 2095 unary(0); 2096 if (tok == '=') { 2097 next(); 2098 pGen->pushR0(); 2099 expr(); 2100 pGen->popR1(); 2101 pGen->storeR0ToR1(t == TOK_INT); 2102 } else if (t) { 2103 pGen->loadR0FromR0(t == TOK_INT); 2104 } 2105 } else if (t == '&') { 2106 pGen->leaR0(*(int *) tok); 2107 next(); 2108 } else if (t == EOF ) { 2109 error("Unexpected EOF."); 2110 } else if (t < TOK_UNDEFINED_SYMBOL) { 2111 error("Unexpected symbol or keyword"); 2112 } else { 2113 if (t == TOK_UNDEFINED_SYMBOL) { 2114 t = (intptr_t) mSymbolTable.addGlobal( 2115 new String(mTokenString)); 2116 } 2117 2118 n = *(int *) t; 2119 /* forward reference: try dlsym */ 2120 if (!n) { 2121 n = (intptr_t) dlsym(RTLD_DEFAULT, 2122 mTokenString.getUnwrapped()); 2123 } 2124 if ((tok == '=') & l) { 2125 /* assignment */ 2126 next(); 2127 expr(); 2128 pGen->storeR0(n); 2129 } else if (tok != '(') { 2130 /* variable */ 2131 pGen->loadR0(n, tokl == 11, tokc); 2132 if (tokl == 11) { 2133 next(); 2134 } 2135 } 2136 } 2137 } 2138 2139 /* function call */ 2140 if (tok == '(') { 2141 if (n == 1) 2142 pGen->pushR0(); 2143 2144 /* push args and invert order */ 2145 a = pGen->beginFunctionCallArguments(); 2146 next(); 2147 l = 0; 2148 while (tok != ')') { 2149 expr(); 2150 pGen->storeR0ToArg(l); 2151 if (tok == ',') 2152 next(); 2153 l = l + 4; 2154 } 2155 pGen->endFunctionCallArguments(a, l); 2156 next(); 2157 if (!n) { 2158 /* forward reference */ 2159 t = t + 4; 2160 *(int *) t = pGen->callForward(*(int *) t); 2161 } else if (n == 1) { 2162 pGen->callIndirect(l); 2163 } else { 2164 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); 2165 } 2166 if (l | (n == 1)) 2167 pGen->adjustStackAfterCall(l, n == 1); 2168 } 2169 } 2170 2171 void sum(int l) { 2172 intptr_t t, n, a; 2173 t = 0; 2174 if (l-- == 1) 2175 unary(1); 2176 else { 2177 sum(l); 2178 a = 0; 2179 while (l == tokl) { 2180 n = tok; 2181 t = tokc; 2182 next(); 2183 2184 if (l > 8) { 2185 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */ 2186 sum(l); 2187 } else { 2188 pGen->pushR0(); 2189 sum(l); 2190 pGen->popR1(); 2191 2192 if ((l == 4) | (l == 5)) { 2193 pGen->gcmp(t); 2194 } else { 2195 pGen->genOp(t); 2196 } 2197 } 2198 } 2199 /* && and || output code generation */ 2200 if (a && l > 8) { 2201 a = pGen->gtst(t == OP_LOGICAL_OR, a); 2202 pGen->li(t != OP_LOGICAL_OR); 2203 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */ 2204 pGen->gsym(a); 2205 pGen->li(t == OP_LOGICAL_OR); 2206 } 2207 } 2208 } 2209 2210 void expr() { 2211 sum(11); 2212 } 2213 2214 int test_expr() { 2215 expr(); 2216 return pGen->gtst(0, 0); 2217 } 2218 2219 void block(intptr_t l) { 2220 intptr_t a, n, t; 2221 2222 if (tok == TOK_IF) { 2223 next(); 2224 skip('('); 2225 a = test_expr(); 2226 skip(')'); 2227 block(l); 2228 if (tok == TOK_ELSE) { 2229 next(); 2230 n = pGen->gjmp(0); /* jmp */ 2231 pGen->gsym(a); 2232 block(l); 2233 pGen->gsym(n); /* patch else jmp */ 2234 } else { 2235 pGen->gsym(a); /* patch if test */ 2236 } 2237 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) { 2238 t = tok; 2239 next(); 2240 skip('('); 2241 if (t == TOK_WHILE) { 2242 n = codeBuf.getPC(); // top of loop, target of "next" iteration 2243 a = test_expr(); 2244 } else { 2245 if (tok != ';') 2246 expr(); 2247 skip(';'); 2248 n = codeBuf.getPC(); 2249 a = 0; 2250 if (tok != ';') 2251 a = test_expr(); 2252 skip(';'); 2253 if (tok != ')') { 2254 t = pGen->gjmp(0); 2255 expr(); 2256 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); 2257 pGen->gsym(t); 2258 n = t + 4; 2259 } 2260 } 2261 skip(')'); 2262 block((intptr_t) &a); 2263 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */ 2264 pGen->gsym(a); 2265 } else if (tok == '{') { 2266 mSymbolTable.pushLevel(); 2267 next(); 2268 /* declarations */ 2269 localDeclarations(); 2270 while (tok != '}' && tok != EOF) 2271 block(l); 2272 skip('}'); 2273 mSymbolTable.popLevel(); 2274 } else { 2275 if (tok == TOK_RETURN) { 2276 next(); 2277 if (tok != ';') 2278 expr(); 2279 rsym = pGen->gjmp(rsym); /* jmp */ 2280 } else if (tok == TOK_BREAK) { 2281 next(); 2282 *(int *) l = pGen->gjmp(*(int *) l); 2283 } else if (tok != ';') 2284 expr(); 2285 skip(';'); 2286 } 2287 } 2288 2289 typedef int Type; 2290 static const Type TY_UNKNOWN = 0; 2291 static const Type TY_INT = 1; 2292 static const Type TY_CHAR = 2; 2293 static const Type TY_VOID = 3; 2294 static const int TY_BASE_TYPE_MASK = 0xf; 2295 static const int TY_INDIRECTION_MASK = 0xf0; 2296 static const int TY_INDIRECTION_SHIFT = 4; 2297 static const int MAX_INDIRECTION_COUNT = 15; 2298 2299 Type getBaseType(Type t) { 2300 return t & TY_BASE_TYPE_MASK; 2301 } 2302 2303 int getIndirectionCount(Type t) { 2304 return (TY_INDIRECTION_MASK & t) >> TY_INDIRECTION_SHIFT; 2305 } 2306 2307 void setIndirectionCount(Type& t, int count) { 2308 t = ((TY_INDIRECTION_MASK & (count << TY_INDIRECTION_SHIFT)) 2309 | (t & ~TY_INDIRECTION_MASK)); 2310 } 2311 2312 bool acceptType(Type& t) { 2313 t = TY_UNKNOWN; 2314 if (tok == TOK_INT) { 2315 t = TY_INT; 2316 } else if (tok == TOK_CHAR) { 2317 t = TY_CHAR; 2318 } else if (tok == TOK_VOID) { 2319 t = TY_VOID; 2320 } else { 2321 return false; 2322 } 2323 next(); 2324 return true; 2325 } 2326 2327 Type acceptPointerDeclaration(Type& base) { 2328 Type t = base; 2329 int indirectionCount = 0; 2330 while (tok == '*' && indirectionCount <= MAX_INDIRECTION_COUNT) { 2331 next(); 2332 indirectionCount++; 2333 } 2334 if (indirectionCount > MAX_INDIRECTION_COUNT) { 2335 error("Too many levels of pointer. Max %d", MAX_INDIRECTION_COUNT); 2336 } 2337 setIndirectionCount(t, indirectionCount); 2338 return t; 2339 } 2340 2341 void expectType(Type& t) { 2342 if (!acceptType(t)) { 2343 error("Expected a type."); 2344 } 2345 } 2346 2347 void checkSymbol() { 2348 if (tok < TOK_SYMBOL) { 2349 error("Expected a symbol"); 2350 } 2351 } 2352 2353 void defineGlobalSymbol() { 2354 if (tok == TOK_UNDEFINED_SYMBOL) { 2355 // TODO: don't allow multiple definitions at same level. 2356 tok = (intptr_t) mSymbolTable.addGlobal( 2357 new String(mTokenString)); 2358 } 2359 } 2360 2361 void defineLocalSymbol() { 2362 // TODO: don't allow multiple definitions at same level. 2363 tok = (intptr_t) mSymbolTable.addLocal( 2364 new String(mTokenString)); 2365 } 2366 2367 void localDeclarations() { 2368 intptr_t a; 2369 Type base; 2370 2371 while (acceptType(base)) { 2372 while (tok != ';') { 2373 Type t = acceptPointerDeclaration(t); 2374 defineLocalSymbol(); 2375 loc = loc + 4; 2376 *(int *) tok = -loc; 2377 2378 next(); 2379 if (tok == ',') 2380 next(); 2381 } 2382 skip(';'); 2383 } 2384 } 2385 2386 void globalDeclarations() { 2387 while (tok != EOF) { 2388 Type base; 2389 expectType(base); 2390 Type t = acceptPointerDeclaration(t); 2391 defineGlobalSymbol(); 2392 VariableInfo* name = (VariableInfo*) tok; 2393 next(); 2394 if (tok == ',' || tok == ';') { 2395 // it's a variable declaration 2396 for(;;) { 2397 name->pAddress = (int*) allocGlobalSpace(4); 2398 if (tok != ',') { 2399 break; 2400 } 2401 skip(','); 2402 t = acceptPointerDeclaration(t); 2403 defineGlobalSymbol(); 2404 name = (VariableInfo*) tok; 2405 next(); 2406 } 2407 skip(';'); 2408 } else { 2409 /* patch forward references (XXX: does not work for function 2410 pointers) */ 2411 pGen->gsym((int) name->pForward); 2412 /* put function address */ 2413 name->pAddress = (void*) codeBuf.getPC(); 2414 skip('('); 2415 mSymbolTable.pushLevel(); 2416 intptr_t a = 8; 2417 int argCount = 0; 2418 while (tok != ')' && tok != EOF) { 2419 Type aType; 2420 expectType(aType); 2421 aType = acceptPointerDeclaration(aType); 2422 defineLocalSymbol(); 2423 /* read param name and compute offset */ 2424 *(int *) tok = a; 2425 a = a + 4; 2426 next(); 2427 if (tok == ',') 2428 next(); 2429 argCount++; 2430 } 2431 skip(')'); 2432 rsym = loc = 0; 2433 a = pGen->functionEntry(argCount); 2434 block(0); 2435 pGen->gsym(rsym); 2436 pGen->functionExit(argCount, a, loc); 2437 mSymbolTable.popLevel(); 2438 } 2439 } 2440 } 2441 2442 char* allocGlobalSpace(int bytes) { 2443 if (glo - pGlobalBase + bytes > ALLOC_SIZE) { 2444 error("Global space exhausted"); 2445 return NULL; 2446 } 2447 char* result = glo; 2448 glo += bytes; 2449 return result; 2450 } 2451 2452 void cleanup() { 2453 if (pGlobalBase != 0) { 2454 free(pGlobalBase); 2455 pGlobalBase = 0; 2456 } 2457 if (pGen) { 2458 delete pGen; 2459 pGen = 0; 2460 } 2461 if (file) { 2462 delete file; 2463 file = 0; 2464 } 2465 } 2466 2467 void clear() { 2468 tok = 0; 2469 tokc = 0; 2470 tokl = 0; 2471 ch = 0; 2472 rsym = 0; 2473 loc = 0; 2474 glo = 0; 2475 dptr = 0; 2476 dch = 0; 2477 file = 0; 2478 pGlobalBase = 0; 2479 pGen = 0; 2480 mPragmaStringCount = 0; 2481 } 2482 2483 void setArchitecture(const char* architecture) { 2484 delete pGen; 2485 pGen = 0; 2486 2487 if (architecture != NULL) { 2488#ifdef PROVIDE_ARM_CODEGEN 2489 if (! pGen && strcmp(architecture, "arm") == 0) { 2490 pGen = new ARMCodeGenerator(); 2491 } 2492#endif 2493#ifdef PROVIDE_X86_CODEGEN 2494 if (! pGen && strcmp(architecture, "x86") == 0) { 2495 pGen = new X86CodeGenerator(); 2496 } 2497#endif 2498 if (!pGen ) { 2499 error("Unknown architecture %s\n", architecture); 2500 } 2501 } 2502 2503 if (pGen == NULL) { 2504#if defined(DEFAULT_ARM_CODEGEN) 2505 pGen = new ARMCodeGenerator(); 2506#elif defined(DEFAULT_X86_CODEGEN) 2507 pGen = new X86CodeGenerator(); 2508#endif 2509 } 2510 if (pGen == NULL) { 2511 error("No code generator defined."); 2512 } else { 2513 pGen->setErrorSink(this); 2514 } 2515 } 2516 2517public: 2518 struct args { 2519 args() { 2520 architecture = 0; 2521 } 2522 const char* architecture; 2523 }; 2524 2525 Compiler() { 2526 clear(); 2527 } 2528 2529 ~Compiler() { 2530 cleanup(); 2531 } 2532 2533 int compile(const char* text, size_t textLength) { 2534 int result; 2535 2536 cleanup(); 2537 clear(); 2538 codeBuf.init(ALLOC_SIZE); 2539 setArchitecture(NULL); 2540 if (!pGen) { 2541 return -1; 2542 } 2543#ifdef PROVIDE_TRACE_CODEGEN 2544 pGen = new TraceCodeGenerator(pGen); 2545#endif 2546 pGen->setErrorSink(this); 2547 pGen->init(&codeBuf); 2548 file = new TextInputStream(text, textLength); 2549 pGlobalBase = (char*) calloc(1, ALLOC_SIZE); 2550 glo = pGlobalBase; 2551 inp(); 2552 next(); 2553 globalDeclarations(); 2554 result = pGen->finishCompile(); 2555 if (result == 0) { 2556 if (mErrorBuf.len()) { 2557 result = -2; 2558 } 2559 } 2560 return result; 2561 } 2562 2563 int dump(FILE* out) { 2564 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out); 2565 return 0; 2566 } 2567 2568 int disassemble(FILE* out) { 2569 return pGen->disassemble(out); 2570 } 2571 2572 /* Look through the symbol table to find a symbol. 2573 * If found, return its value. 2574 */ 2575 void* lookup(const char* name) { 2576 String string(name, -1, false); 2577 VariableInfo* pVariableInfo = mSymbolTable.get(&string); 2578 if (pVariableInfo) { 2579 return pVariableInfo->pAddress; 2580 } 2581 return NULL; 2582 } 2583 2584 void getPragmas(ACCsizei* actualStringCount, 2585 ACCsizei maxStringCount, ACCchar** strings) { 2586 int stringCount = mPragmaStringCount; 2587 if (actualStringCount) { 2588 *actualStringCount = stringCount; 2589 } 2590 if (stringCount > maxStringCount) { 2591 stringCount = maxStringCount; 2592 } 2593 if (strings) { 2594 char* pPragmas = mPragmas.getUnwrapped(); 2595 while (stringCount-- > 0) { 2596 *strings++ = pPragmas; 2597 pPragmas += strlen(pPragmas) + 1; 2598 } 2599 } 2600 } 2601 2602 char* getErrorMessage() { 2603 return mErrorBuf.getUnwrapped(); 2604 } 2605 2606}; 2607 2608const char* Compiler::operatorChars = 2609 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@"; 2610 2611const char Compiler::operatorLevel[] = 2612 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 2613 5, 5, /* ==, != */ 2614 9, 10, /* &&, || */ 2615 6, 7, 8, /* & ^ | */ 2616 2, 2 /* ~ ! */ 2617 }; 2618 2619#ifdef PROVIDE_ARM_CODEGEN 2620FILE* Compiler::ARMCodeGenerator::disasmOut; 2621#endif 2622 2623#ifdef PROVIDE_X86_CODEGEN 2624const int Compiler::X86CodeGenerator::operatorHelper[] = { 2625 0x1, // ++ 2626 0xff, // -- 2627 0xc1af0f, // * 2628 0xf9f79991, // / 2629 0xf9f79991, // % (With manual assist to swap results) 2630 0xc801, // + 2631 0xd8f7c829, // - 2632 0xe0d391, // << 2633 0xf8d391, // >> 2634 0xe, // <= 2635 0xd, // >= 2636 0xc, // < 2637 0xf, // > 2638 0x4, // == 2639 0x5, // != 2640 0x0, // && 2641 0x1, // || 2642 0xc821, // & 2643 0xc831, // ^ 2644 0xc809, // | 2645 0xd0f7, // ~ 2646 0x4 // ! 2647}; 2648#endif 2649 2650struct ACCscript { 2651 ACCscript() { 2652 text = 0; 2653 textLength = 0; 2654 accError = ACC_NO_ERROR; 2655 } 2656 2657 ~ACCscript() { 2658 delete text; 2659 } 2660 2661 void setError(ACCenum error) { 2662 if (accError == ACC_NO_ERROR && error != ACC_NO_ERROR) { 2663 accError = error; 2664 } 2665 } 2666 2667 ACCenum getError() { 2668 ACCenum result = accError; 2669 accError = ACC_NO_ERROR; 2670 return result; 2671 } 2672 2673 Compiler compiler; 2674 char* text; 2675 int textLength; 2676 ACCenum accError; 2677}; 2678 2679 2680extern "C" 2681ACCscript* accCreateScript() { 2682 return new ACCscript(); 2683} 2684 2685extern "C" 2686ACCenum accGetError( ACCscript* script ) { 2687 return script->getError(); 2688} 2689 2690extern "C" 2691void accDeleteScript(ACCscript* script) { 2692 delete script; 2693} 2694 2695extern "C" 2696void accScriptSource(ACCscript* script, 2697 ACCsizei count, 2698 const ACCchar ** string, 2699 const ACCint * length) { 2700 int totalLength = 0; 2701 for(int i = 0; i < count; i++) { 2702 int len = -1; 2703 const ACCchar* s = string[i]; 2704 if (length) { 2705 len = length[i]; 2706 } 2707 if (len < 0) { 2708 len = strlen(s); 2709 } 2710 totalLength += len; 2711 } 2712 delete script->text; 2713 char* text = new char[totalLength + 1]; 2714 script->text = text; 2715 script->textLength = totalLength; 2716 char* dest = text; 2717 for(int i = 0; i < count; i++) { 2718 int len = -1; 2719 const ACCchar* s = string[i]; 2720 if (length) { 2721 len = length[i]; 2722 } 2723 if (len < 0) { 2724 len = strlen(s); 2725 } 2726 memcpy(dest, s, len); 2727 dest += len; 2728 } 2729 text[totalLength] = '\0'; 2730} 2731 2732extern "C" 2733void accCompileScript(ACCscript* script) { 2734 int result = script->compiler.compile(script->text, script->textLength); 2735 if (result) { 2736 script->setError(ACC_INVALID_OPERATION); 2737 } 2738} 2739 2740extern "C" 2741void accGetScriptiv(ACCscript* script, 2742 ACCenum pname, 2743 ACCint * params) { 2744 switch (pname) { 2745 case ACC_INFO_LOG_LENGTH: 2746 *params = 0; 2747 break; 2748 } 2749} 2750 2751extern "C" 2752void accGetScriptInfoLog(ACCscript* script, 2753 ACCsizei maxLength, 2754 ACCsizei * length, 2755 ACCchar * infoLog) { 2756 char* message = script->compiler.getErrorMessage(); 2757 int messageLength = strlen(message) + 1; 2758 if (length) { 2759 *length = messageLength; 2760 } 2761 if (infoLog && maxLength > 0) { 2762 int trimmedLength = maxLength < messageLength ? 2763 maxLength : messageLength; 2764 memcpy(infoLog, message, trimmedLength); 2765 infoLog[trimmedLength] = 0; 2766 } 2767} 2768 2769extern "C" 2770void accGetScriptLabel(ACCscript* script, const ACCchar * name, 2771 ACCvoid ** address) { 2772 void* value = script->compiler.lookup(name); 2773 if (value) { 2774 *address = value; 2775 } else { 2776 script->setError(ACC_INVALID_VALUE); 2777 } 2778} 2779 2780extern "C" 2781void accGetPragmas(ACCscript* script, ACCsizei* actualStringCount, 2782 ACCsizei maxStringCount, ACCchar** strings){ 2783 script->compiler.getPragmas(actualStringCount, maxStringCount, strings); 2784} 2785 2786 2787} // namespace acc 2788 2789