acc.cpp revision 8df4619e09b8944c1aa3b743b556a63a283d8768
1/* 2 * Android "Almost" C Compiler. 3 * This is a compiler for a small subset of the C language, intended for use 4 * in scripting environments where speed and memory footprint are important. 5 * 6 * This code is based upon the "unobfuscated" version of the 7 * Obfuscated Tiny C compiler, see the file LICENSE for details. 8 * 9 */ 10 11#include <ctype.h> 12#include <dlfcn.h> 13#include <errno.h> 14#include <stdarg.h> 15#include <stdint.h> 16#include <stdio.h> 17#include <stdlib.h> 18#include <string.h> 19#include <cutils/hashmap.h> 20 21#if defined(__i386__) 22#include <sys/mman.h> 23#endif 24 25#if defined(__arm__) 26#include <unistd.h> 27#endif 28 29#if defined(__arm__) 30#define DEFAULT_ARM_CODEGEN 31#define PROVIDE_ARM_CODEGEN 32#elif defined(__i386__) 33#define DEFAULT_X86_CODEGEN 34#define PROVIDE_X86_CODEGEN 35#elif defined(__x86_64__) 36#define DEFAULT_X64_CODEGEN 37#define PROVIDE_X64_CODEGEN 38#endif 39 40 41#ifdef PROVIDE_ARM_CODEGEN 42#include "disassem.h" 43#endif 44 45#include <acc/acc.h> 46 47#define LOG_API(...) do {} while(0) 48// #define LOG_API(...) fprintf (stderr, __VA_ARGS__) 49 50#define LOG_STACK(...) do {} while(0) 51// #define LOG_STACK(...) fprintf (stderr, __VA_ARGS__) 52 53// #define ENABLE_ARM_DISASSEMBLY 54// #define PROVIDE_TRACE_CODEGEN 55 56namespace acc { 57 58// Subset of STL vector. 59template<class E> class Vector { 60 public: 61 Vector() { 62 mpBase = 0; 63 mUsed = 0; 64 mSize = 0; 65 } 66 67 ~Vector() { 68 if (mpBase) { 69 for(size_t i = 0; i < mUsed; i++) { 70 mpBase[mUsed].~E(); 71 } 72 free(mpBase); 73 } 74 } 75 76 inline E& operator[](size_t i) { 77 return mpBase[i]; 78 } 79 80 inline E& front() { 81 return mpBase[0]; 82 } 83 84 inline E& back() { 85 return mpBase[mUsed - 1]; 86 } 87 88 void pop_back() { 89 mUsed -= 1; 90 mpBase[mUsed].~E(); 91 } 92 93 void push_back(const E& item) { 94 * ensure(1) = item; 95 } 96 97 size_t size() { 98 return mUsed; 99 } 100 101private: 102 E* ensure(int n) { 103 size_t newUsed = mUsed + n; 104 if (newUsed > mSize) { 105 size_t newSize = mSize * 2 + 10; 106 if (newSize < newUsed) { 107 newSize = newUsed; 108 } 109 mpBase = (E*) realloc(mpBase, sizeof(E) * newSize); 110 mSize = newSize; 111 } 112 E* result = mpBase + mUsed; 113 mUsed = newUsed; 114 return result; 115 } 116 117 E* mpBase; 118 size_t mUsed; 119 size_t mSize; 120}; 121 122class ErrorSink { 123public: 124 void error(const char *fmt, ...) { 125 va_list ap; 126 va_start(ap, fmt); 127 verror(fmt, ap); 128 va_end(ap); 129 } 130 131 virtual void verror(const char* fmt, va_list ap) = 0; 132}; 133 134class Compiler : public ErrorSink { 135 typedef int tokenid_t; 136 enum TypeTag { 137 TY_INT, TY_CHAR, TY_VOID, TY_FLOAT, TY_DOUBLE, 138 TY_POINTER, TY_FUNC, TY_PARAM 139 }; 140 141 struct Type { 142 TypeTag tag; 143 tokenid_t id; // For function arguments 144 Type* pHead; 145 Type* pTail; 146 }; 147 148 class CodeBuf { 149 char* ind; // Output code pointer 150 char* pProgramBase; 151 ErrorSink* mErrorSink; 152 int mSize; 153 bool mOverflowed; 154 155 void release() { 156 if (pProgramBase != 0) { 157 free(pProgramBase); 158 pProgramBase = 0; 159 } 160 } 161 162 bool check(int n) { 163 int newSize = ind - pProgramBase + n; 164 bool overflow = newSize > mSize; 165 if (overflow && !mOverflowed) { 166 mOverflowed = true; 167 if (mErrorSink) { 168 mErrorSink->error("Code too large: %d bytes", newSize); 169 } 170 } 171 return overflow; 172 } 173 174 public: 175 CodeBuf() { 176 pProgramBase = 0; 177 ind = 0; 178 mErrorSink = 0; 179 mSize = 0; 180 mOverflowed = false; 181 } 182 183 ~CodeBuf() { 184 release(); 185 } 186 187 void init(int size) { 188 release(); 189 mSize = size; 190 pProgramBase = (char*) calloc(1, size); 191 ind = pProgramBase; 192 } 193 194 void setErrorSink(ErrorSink* pErrorSink) { 195 mErrorSink = pErrorSink; 196 } 197 198 int o4(int n) { 199 if(check(4)) { 200 return 0; 201 } 202 intptr_t result = (intptr_t) ind; 203 * (int*) ind = n; 204 ind += 4; 205 return result; 206 } 207 208 /* 209 * Output a byte. Handles all values, 0..ff. 210 */ 211 void ob(int n) { 212 if(check(1)) { 213 return; 214 } 215 *ind++ = n; 216 } 217 218 inline void* getBase() { 219 return (void*) pProgramBase; 220 } 221 222 intptr_t getSize() { 223 return ind - pProgramBase; 224 } 225 226 intptr_t getPC() { 227 return (intptr_t) ind; 228 } 229 }; 230 231 /** 232 * A code generator creates an in-memory program, generating the code on 233 * the fly. There is one code generator implementation for each supported 234 * architecture. 235 * 236 * The code generator implements the following abstract machine: 237 * R0 - the accumulator. 238 * FP - a frame pointer for accessing function arguments and local 239 * variables. 240 * SP - a stack pointer for storing intermediate results while evaluating 241 * expressions. The stack pointer grows downwards. 242 * 243 * The function calling convention is that all arguments are placed on the 244 * stack such that the first argument has the lowest address. 245 * After the call, the result is in R0. The caller is responsible for 246 * removing the arguments from the stack. 247 * The R0 register is not saved across function calls. The 248 * FP and SP registers are saved. 249 */ 250 251 class CodeGenerator { 252 public: 253 CodeGenerator() { 254 mErrorSink = 0; 255 pCodeBuf = 0; 256 pushType(); 257 } 258 virtual ~CodeGenerator() {} 259 260 virtual void init(CodeBuf* pCodeBuf) { 261 this->pCodeBuf = pCodeBuf; 262 pCodeBuf->setErrorSink(mErrorSink); 263 } 264 265 virtual void setErrorSink(ErrorSink* pErrorSink) { 266 mErrorSink = pErrorSink; 267 if (pCodeBuf) { 268 pCodeBuf->setErrorSink(mErrorSink); 269 } 270 } 271 272 /* Emit a function prolog. 273 * argCount is the number of arguments. 274 * Save the old value of the FP. 275 * Set the new value of the FP. 276 * Convert from the native platform calling convention to 277 * our stack-based calling convention. This may require 278 * pushing arguments from registers to the stack. 279 * Allocate "N" bytes of stack space. N isn't known yet, so 280 * just emit the instructions for adjusting the stack, and return 281 * the address to patch up. The patching will be done in 282 * functionExit(). 283 * returns address to patch with local variable size. 284 */ 285 virtual int functionEntry(int argCount) = 0; 286 287 /* Emit a function epilog. 288 * Restore the old SP and FP register values. 289 * Return to the calling function. 290 * argCount - the number of arguments to the function. 291 * localVariableAddress - returned from functionEntry() 292 * localVariableSize - the size in bytes of the local variables. 293 */ 294 virtual void functionExit(int argCount, int localVariableAddress, 295 int localVariableSize) = 0; 296 297 /* load immediate value to R0 */ 298 virtual void li(int i, Type* pType) = 0; 299 300 /* load floating point immediate value to R0 */ 301 virtual void lif(float f, Type* pType) = 0; 302 303 /* load double-precision floating point immediate value to R0 */ 304 virtual void lid(double d, Type* pType) = 0; 305 306 /* Jump to a target, and return the address of the word that 307 * holds the target data, in case it needs to be fixed up later. 308 */ 309 virtual int gjmp(int t) = 0; 310 311 /* Test R0 and jump to a target if the test succeeds. 312 * l = 0: je, l == 1: jne 313 * Return the address of the word that holds the targed data, in 314 * case it needs to be fixed up later. 315 */ 316 virtual int gtst(bool l, int t) = 0; 317 318 /* Compare TOS against R0, and store the boolean result in R0. 319 * Pops TOS. 320 * op specifies the comparison. 321 */ 322 virtual void gcmp(int op) = 0; 323 324 /* Perform the arithmetic op specified by op. TOS is the 325 * left argument, R0 is the right argument. 326 * Pops TOS. 327 */ 328 virtual void genOp(int op) = 0; 329 330 /* Compare 0 against R0, and store the boolean result in R0. 331 * op specifies the comparison. 332 */ 333 virtual void gUnaryCmp(int op) = 0; 334 335 /* Perform the arithmetic op specified by op. 0 is the 336 * left argument, R0 is the right argument. 337 */ 338 virtual void genUnaryOp(int op) = 0; 339 340 /* Push R0 onto the stack. 341 */ 342 virtual void pushR0() = 0; 343 344 /* Store R0 to the address stored in TOS. 345 * The TOS is popped. 346 * pPointerType is the type of the pointer (of the input R0). 347 */ 348 virtual void storeR0ToTOS(Type* pPointerType) = 0; 349 350 /* Load R0 from the address stored in R0. 351 * pPointerType is the type of the pointer (of the input R0). 352 */ 353 virtual void loadR0FromR0(Type* pPointerType) = 0; 354 355 /* Load the absolute address of a variable to R0. 356 * If ea <= LOCAL, then this is a local variable, or an 357 * argument, addressed relative to FP. 358 * else it is an absolute global address. 359 */ 360 virtual void leaR0(int ea, Type* pPointerType) = 0; 361 362 /* Store R0 to a variable. 363 * If ea <= LOCAL, then this is a local variable, or an 364 * argument, addressed relative to FP. 365 * else it is an absolute global address. 366 */ 367 virtual void storeR0(int ea) = 0; 368 369 /* load R0 from a variable. 370 * If ea <= LOCAL, then this is a local variable, or an 371 * argument, addressed relative to FP. 372 * else it is an absolute global address. 373 * If isIncDec is true, then the stored variable's value 374 * should be post-incremented or post-decremented, based 375 * on the value of op. 376 */ 377 virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) = 0; 378 379 /** 380 * Convert R0 to the given type. 381 */ 382 virtual void convertR0(Type* pType) = 0; 383 384 /* Emit code to adjust the stack for a function call. Return the 385 * label for the address of the instruction that adjusts the 386 * stack size. This will be passed as argument "a" to 387 * endFunctionCallArguments. 388 */ 389 virtual int beginFunctionCallArguments() = 0; 390 391 /* Emit code to store R0 to the stack at byte offset l. 392 */ 393 virtual void storeR0ToArg(int l) = 0; 394 395 /* Patch the function call preamble. 396 * a is the address returned from beginFunctionCallArguments 397 * l is the number of bytes the arguments took on the stack. 398 * Typically you would also emit code to convert the argument 399 * list into whatever the native function calling convention is. 400 * On ARM for example you would pop the first 5 arguments into 401 * R0..R4 402 */ 403 virtual void endFunctionCallArguments(int a, int l) = 0; 404 405 /* Emit a call to an unknown function. The argument "symbol" needs to 406 * be stored in the location where the address should go. It forms 407 * a chain. The address will be patched later. 408 * Return the address of the word that has to be patched. 409 */ 410 virtual int callForward(int symbol, Type* pFunc) = 0; 411 412 /* Call a function using PC-relative addressing. t is the PC-relative 413 * address of the function. It has already been adjusted for the 414 * architectural jump offset, so just store it as-is. 415 */ 416 virtual void callRelative(int t, Type* pFunc) = 0; 417 418 /* Call a function pointer. L is the number of bytes the arguments 419 * take on the stack. The address of the function is stored at 420 * location SP + l. 421 */ 422 virtual void callIndirect(int l, Type* pFunc) = 0; 423 424 /* Adjust SP after returning from a function call. l is the 425 * number of bytes of arguments stored on the stack. isIndirect 426 * is true if this was an indirect call. (In which case the 427 * address of the function is stored at location SP + l.) 428 */ 429 virtual void adjustStackAfterCall(int l, bool isIndirect) = 0; 430 431 /* Print a disassembly of the assembled code to out. Return 432 * non-zero if there is an error. 433 */ 434 virtual int disassemble(FILE* out) = 0; 435 436 /* Generate a symbol at the current PC. t is the head of a 437 * linked list of addresses to patch. 438 */ 439 virtual void gsym(int t) = 0; 440 441 /* 442 * Do any cleanup work required at the end of a compile. 443 * For example, an instruction cache might need to be 444 * invalidated. 445 * Return non-zero if there is an error. 446 */ 447 virtual int finishCompile() = 0; 448 449 /** 450 * Adjust relative branches by this amount. 451 */ 452 virtual int jumpOffset() = 0; 453 454 /** 455 * Stack alignment (in bytes) for this type of data 456 */ 457 virtual size_t stackAlignment(Type* type) = 0; 458 459 /** 460 * Array element alignment (in bytes) for this type of data. 461 */ 462 virtual size_t sizeOf(Type* type) = 0; 463 464 protected: 465 /* 466 * Output a byte. Handles all values, 0..ff. 467 */ 468 void ob(int n) { 469 pCodeBuf->ob(n); 470 } 471 472 intptr_t o4(int data) { 473 return pCodeBuf->o4(data); 474 } 475 476 intptr_t getBase() { 477 return (intptr_t) pCodeBuf->getBase(); 478 } 479 480 intptr_t getPC() { 481 return pCodeBuf->getPC(); 482 } 483 484 intptr_t getSize() { 485 return pCodeBuf->getSize(); 486 } 487 488 void error(const char* fmt,...) { 489 va_list ap; 490 va_start(ap, fmt); 491 mErrorSink->verror(fmt, ap); 492 va_end(ap); 493 } 494 495 void assert(bool test) { 496 if (!test) { 497 error("code generator assertion failed."); 498 } 499 } 500 501 void setR0Type(Type* pType) { 502 mExpressionStack.back() = pType; 503 } 504 505 Type* getR0Type() { 506 return mExpressionStack.back(); 507 } 508 509 Type* getTOSType() { 510 return mExpressionStack[mExpressionStack.size()-2]; 511 } 512 513 void pushType() { 514 mExpressionStack.push_back(NULL); 515 } 516 517 void popType() { 518 mExpressionStack.pop_back(); 519 } 520 521 bool bitsSame(Type* pA, Type* pB) { 522 return collapseType(pA->tag) == collapseType(pB->tag); 523 } 524 525 TypeTag collapseType(TypeTag tag) { 526 static const TypeTag collapsedTag[] = { 527 TY_INT, TY_INT, TY_VOID, TY_FLOAT, TY_DOUBLE, TY_INT, 528 TY_VOID, TY_VOID}; 529 return collapsedTag[tag]; 530 } 531 532 private: 533 Vector<Type*> mExpressionStack; 534 CodeBuf* pCodeBuf; 535 ErrorSink* mErrorSink; 536 }; 537 538#ifdef PROVIDE_ARM_CODEGEN 539 540 class ARMCodeGenerator : public CodeGenerator { 541 public: 542 ARMCodeGenerator() {} 543 544 virtual ~ARMCodeGenerator() {} 545 546 /* returns address to patch with local variable size 547 */ 548 virtual int functionEntry(int argCount) { 549 LOG_API("functionEntry(%d);\n", argCount); 550 mStackUse = 0; 551 // sp -> arg4 arg5 ... 552 // Push our register-based arguments back on the stack 553 if (argCount > 0) { 554 int regArgCount = argCount <= 4 ? argCount : 4; 555 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {} 556 mStackUse += regArgCount * 4; 557 } 558 // sp -> arg0 arg1 ... 559 o4(0xE92D4800); // stmfd sp!, {fp, lr} 560 mStackUse += 2 * 4; 561 // sp, fp -> oldfp, retadr, arg0 arg1 .... 562 o4(0xE1A0B00D); // mov fp, sp 563 LOG_STACK("functionEntry: %d\n", mStackUse); 564 return o4(0xE24DD000); // sub sp, sp, # <local variables> 565 // We don't know how many local variables we are going to use, 566 // but we will round the allocation up to a multiple of 567 // STACK_ALIGNMENT, so it won't affect the stack alignment. 568 } 569 570 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 571 LOG_API("functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize); 572 // Round local variable size up to a multiple of stack alignment 573 localVariableSize = ((localVariableSize + STACK_ALIGNMENT - 1) / 574 STACK_ALIGNMENT) * STACK_ALIGNMENT; 575 // Patch local variable allocation code: 576 if (localVariableSize < 0 || localVariableSize > 255) { 577 error("localVariables out of range: %d", localVariableSize); 578 } 579 *(char*) (localVariableAddress) = localVariableSize; 580 581 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ... 582 o4(0xE1A0E00B); // mov lr, fp 583 o4(0xE59BB000); // ldr fp, [fp] 584 o4(0xE28ED004); // add sp, lr, #4 585 // sp -> retadr, arg0, ... 586 o4(0xE8BD4000); // ldmfd sp!, {lr} 587 // sp -> arg0 .... 588 if (argCount > 0) { 589 // We store the PC into the lr so we can adjust the sp before 590 // returning. We need to pull off the registers we pushed 591 // earlier. We don't need to actually store them anywhere, 592 // just adjust the stack. 593 int regArgCount = argCount <= 4 ? argCount : 4; 594 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2 595 } 596 o4(0xE12FFF1E); // bx lr 597 } 598 599 /* load immediate value */ 600 virtual void li(int t, Type* pType) { 601 LOG_API("li(%d);\n", t); 602 if (t >= 0 && t < 255) { 603 o4(0xE3A00000 + t); // mov r0, #0 604 } else if (t >= -256 && t < 0) { 605 // mvn means move constant ^ ~0 606 o4(0xE3E00001 - t); // mvn r0, #0 607 } else { 608 o4(0xE51F0000); // ldr r0, .L3 609 o4(0xEA000000); // b .L99 610 o4(t); // .L3: .word 0 611 // .L99: 612 } 613 setR0Type(pType); 614 } 615 616 virtual void lif(float f, Type* pType) { 617 union { float f; int i; } converter; 618 converter.f = f; 619 li(converter.i, pType); 620 } 621 622 virtual void lid(double d, Type* pType) { 623 union { double d; int i[2]; } converter; 624 converter.d = d; 625 error("lid: unimplemented"); 626 setR0Type(pType); 627 } 628 629 virtual int gjmp(int t) { 630 LOG_API("gjmp(%d);\n", t); 631 return o4(0xEA000000 | encodeAddress(t)); // b .L33 632 } 633 634 /* l = 0: je, l == 1: jne */ 635 virtual int gtst(bool l, int t) { 636 LOG_API("gtst(%d, %d);\n", l, t); 637 o4(0xE3500000); // cmp r0,#0 638 int branch = l ? 0x1A000000 : 0x0A000000; // bne : beq 639 return o4(branch | encodeAddress(t)); 640 } 641 642 virtual void gcmp(int op) { 643 LOG_API("gcmp(%d);\n", op); 644 o4(0xE8BD0002); // ldmfd sp!,{r1} 645 mStackUse -= 4; 646 o4(0xE1510000); // cmp r1, r1 647 switch(op) { 648 case OP_EQUALS: 649 o4(0x03A00001); // moveq r0,#1 650 o4(0x13A00000); // movne r0,#0 651 break; 652 case OP_NOT_EQUALS: 653 o4(0x03A00000); // moveq r0,#0 654 o4(0x13A00001); // movne r0,#1 655 break; 656 case OP_LESS_EQUAL: 657 o4(0xD3A00001); // movle r0,#1 658 o4(0xC3A00000); // movgt r0,#0 659 break; 660 case OP_GREATER: 661 o4(0xD3A00000); // movle r0,#0 662 o4(0xC3A00001); // movgt r0,#1 663 break; 664 case OP_GREATER_EQUAL: 665 o4(0xA3A00001); // movge r0,#1 666 o4(0xB3A00000); // movlt r0,#0 667 break; 668 case OP_LESS: 669 o4(0xA3A00000); // movge r0,#0 670 o4(0xB3A00001); // movlt r0,#1 671 break; 672 default: 673 error("Unknown comparison op %d", op); 674 break; 675 } 676 popType(); 677 } 678 679 virtual void genOp(int op) { 680 LOG_API("genOp(%d);\n", op); 681 o4(0xE8BD0002); // ldmfd sp!,{r1} 682 mStackUse -= 4; 683 switch(op) { 684 case OP_MUL: 685 o4(0x0E0000091); // mul r0,r1,r0 686 break; 687 case OP_DIV: 688 callRuntime(runtime_DIV); 689 break; 690 case OP_MOD: 691 callRuntime(runtime_MOD); 692 break; 693 case OP_PLUS: 694 o4(0xE0810000); // add r0,r1,r0 695 break; 696 case OP_MINUS: 697 o4(0xE0410000); // sub r0,r1,r0 698 break; 699 case OP_SHIFT_LEFT: 700 o4(0xE1A00011); // lsl r0,r1,r0 701 break; 702 case OP_SHIFT_RIGHT: 703 o4(0xE1A00051); // asr r0,r1,r0 704 break; 705 case OP_BIT_AND: 706 o4(0xE0010000); // and r0,r1,r0 707 break; 708 case OP_BIT_XOR: 709 o4(0xE0210000); // eor r0,r1,r0 710 break; 711 case OP_BIT_OR: 712 o4(0xE1810000); // orr r0,r1,r0 713 break; 714 case OP_BIT_NOT: 715 o4(0xE1E00000); // mvn r0, r0 716 break; 717 default: 718 error("Unimplemented op %d\n", op); 719 break; 720 } 721 popType(); 722 } 723 724 virtual void gUnaryCmp(int op) { 725 LOG_API("gcmp(%d);\n", op); 726 o4(0xE3A01000); // mov r1, #0 727 o4(0xE1510000); // cmp r1, r1 728 switch(op) { 729 case OP_NOT_EQUALS: 730 o4(0x03A00000); // moveq r0,#0 731 o4(0x13A00001); // movne r0,#1 732 break; 733 default: 734 error("Unknown unary comparison op %d", op); 735 break; 736 } 737 } 738 739 virtual void genUnaryOp(int op) { 740 LOG_API("genOp(%d);\n", op); 741 switch(op) { 742 case OP_PLUS: 743 // Do nothing 744 break; 745 case OP_MINUS: 746 o4(0xE3A01000); // mov r1, #0 747 o4(0xE0410000); // sub r0,r1,r0 748 break; 749 case OP_BIT_NOT: 750 o4(0xE1E00000); // mvn r0, r0 751 break; 752 default: 753 error("Unknown unary op %d\n", op); 754 break; 755 } 756 } 757 758 virtual void pushR0() { 759 LOG_API("pushR0();\n"); 760 o4(0xE92D0001); // stmfd sp!,{r0} 761 mStackUse += 4; 762 pushType(); 763 LOG_STACK("pushR0: %d\n", mStackUse); 764 } 765 766 virtual void storeR0ToTOS(Type* pPointerType) { 767 LOG_API("storeR0ToTOS(%d);\n", isInt); 768 assert(pPointerType->tag == TY_POINTER); 769 o4(0xE8BD0002); // ldmfd sp!,{r1} 770 mStackUse -= 4; 771 switch (pPointerType->pHead->tag) { 772 case TY_INT: 773 o4(0xE5810000); // str r0, [r1] 774 break; 775 case TY_CHAR: 776 o4(0xE5C10000); // strb r0, [r1] 777 break; 778 default: 779 error("storeR0ToTOS: unimplemented type"); 780 break; 781 } 782 popType(); 783 } 784 785 virtual void loadR0FromR0(Type* pPointerType) { 786 LOG_API("loadR0FromR0(%d);\n", pPointerType); 787 assert(pPointerType->tag == TY_POINTER); 788 switch (pPointerType->pHead->tag) { 789 case TY_INT: 790 o4(0xE5900000); // ldr r0, [r0] 791 break; 792 case TY_CHAR: 793 o4(0xE5D00000); // ldrb r0, [r0] 794 break; 795 default: 796 error("loadR0FromR0: unimplemented type"); 797 break; 798 } 799 setR0Type(pPointerType->pHead); 800 } 801 802 virtual void leaR0(int ea, Type* pPointerType) { 803 LOG_API("leaR0(%d);\n", ea); 804 if (ea < LOCAL) { 805 // Local, fp relative 806 if (ea < -1023 || ea > 1023 || ((ea & 3) != 0)) { 807 error("Offset out of range: %08x", ea); 808 } 809 if (ea < 0) { 810 o4(0xE24B0F00 | (0xff & ((-ea) >> 2))); // sub r0, fp, #ea 811 } else { 812 o4(0xE28B0F00 | (0xff & (ea >> 2))); // add r0, fp, #ea 813 } 814 } else { 815 // Global, absolute. 816 o4(0xE59F0000); // ldr r0, .L1 817 o4(0xEA000000); // b .L99 818 o4(ea); // .L1: .word 0 819 // .L99: 820 } 821 setR0Type(pPointerType); 822 } 823 824 virtual void storeR0(int ea) { 825 LOG_API("storeR0(%d);\n", ea); 826 if (ea < LOCAL) { 827 // Local, fp relative 828 if (ea < -4095 || ea > 4095) { 829 error("Offset out of range: %08x", ea); 830 } 831 if (ea < 0) { 832 o4(0xE50B0000 | (0xfff & (-ea))); // str r0, [fp,#-ea] 833 } else { 834 o4(0xE58B0000 | (0xfff & ea)); // str r0, [fp,#ea] 835 } 836 } else{ 837 // Global, absolute 838 o4(0xE59F1000); // ldr r1, .L1 839 o4(0xEA000000); // b .L99 840 o4(ea); // .L1: .word 0 841 o4(0xE5810000); // .L99: str r0, [r1] 842 } 843 } 844 845 virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) { 846 LOG_API("loadR0(%d, %d, %d);\n", ea, isIncDec, op); 847 if (ea < LOCAL) { 848 // Local, fp relative 849 if (ea < -4095 || ea > 4095) { 850 error("Offset out of range: %08x", ea); 851 } 852 if (ea < 0) { 853 o4(0xE51B0000 | (0xfff & (-ea))); // ldr r0, [fp,#-ea] 854 } else { 855 o4(0xE59B0000 | (0xfff & ea)); // ldr r0, [fp,#ea] 856 } 857 } else { 858 // Global, absolute 859 o4(0xE59F2000); // ldr r2, .L1 860 o4(0xEA000000); // b .L99 861 o4(ea); // .L1: .word ea 862 o4(0xE5920000); // .L99: ldr r0, [r2] 863 } 864 865 if (isIncDec) { 866 switch (op) { 867 case OP_INCREMENT: 868 o4(0xE2801001); // add r1, r0, #1 869 break; 870 case OP_DECREMENT: 871 o4(0xE2401001); // sub r1, r0, #1 872 break; 873 default: 874 error("unknown opcode: %d", op); 875 } 876 if (ea < LOCAL) { 877 // Local, fp relative 878 // Don't need range check, was already checked above 879 if (ea < 0) { 880 o4(0xE50B1000 | (0xfff & (-ea))); // str r1, [fp,#-ea] 881 } else { 882 o4(0xE58B1000 | (0xfff & ea)); // str r1, [fp,#ea] 883 } 884 } else{ 885 // Global, absolute 886 // r2 is already set up from before. 887 o4(0xE5821000); // str r1, [r2] 888 } 889 } 890 setR0Type(pType); 891 } 892 893 virtual void convertR0(Type* pType){ 894 if (bitsSame(pType, getR0Type())) { 895 return; 896 } 897 error("Incompatible types"); 898 } 899 900 virtual int beginFunctionCallArguments() { 901 LOG_API("beginFunctionCallArguments();\n"); 902 return o4(0xE24DDF00); // Placeholder 903 } 904 905 virtual void storeR0ToArg(int l) { 906 LOG_API("storeR0ToArg(%d);\n", l); 907 if (l < 0 || l > 4096-4) { 908 error("l out of range for stack offset: 0x%08x", l); 909 } 910 o4(0xE58D0000 + l); // str r0, [sp, #4] 911 } 912 913 virtual void endFunctionCallArguments(int a, int l) { 914 LOG_API("endFunctionCallArguments(0x%08x, %d);\n", a, l); 915 int argCount = l >> 2; 916 int argumentStackUse = l; 917 if (argCount > 0) { 918 int regArgCount = argCount > 4 ? 4 : argCount; 919 argumentStackUse -= regArgCount * 4; 920 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{} 921 } 922 mStackUse += argumentStackUse; 923 924 // Align stack. 925 int missalignment = mStackUse - ((mStackUse / STACK_ALIGNMENT) 926 * STACK_ALIGNMENT); 927 mStackAlignmentAdjustment = 0; 928 if (missalignment > 0) { 929 mStackAlignmentAdjustment = STACK_ALIGNMENT - missalignment; 930 } 931 l += mStackAlignmentAdjustment; 932 933 if (l < 0 || l > 0x3FC) { 934 error("L out of range for stack adjustment: 0x%08x", l); 935 } 936 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2 937 mStackUse += mStackAlignmentAdjustment; 938 LOG_STACK("endFunctionCallArguments mStackUse: %d, mStackAlignmentAdjustment %d\n", 939 mStackUse, mStackAlignmentAdjustment); 940 } 941 942 virtual int callForward(int symbol, Type* pFunc) { 943 LOG_API("callForward(%d);\n", symbol); 944 setR0Type(pFunc->pHead); 945 // Forward calls are always short (local) 946 return o4(0xEB000000 | encodeAddress(symbol)); 947 } 948 949 virtual void callRelative(int t, Type* pFunc) { 950 LOG_API("callRelative(%d);\n", t); 951 setR0Type(pFunc->pHead); 952 int abs = t + getPC() + jumpOffset(); 953 LOG_API("abs=%d (0x%08x)\n", abs, abs); 954 if (t >= - (1 << 25) && t < (1 << 25)) { 955 o4(0xEB000000 | encodeAddress(t)); 956 } else { 957 // Long call. 958 o4(0xE59FC000); // ldr r12, .L1 959 o4(0xEA000000); // b .L99 960 o4(t - 12); // .L1: .word 0 961 o4(0xE08CC00F); // .L99: add r12,pc 962 o4(0xE12FFF3C); // blx r12 963 } 964 } 965 966 virtual void callIndirect(int l, Type* pFunc) { 967 LOG_API("callIndirect(%d);\n", l); 968 setR0Type(pFunc->pHead); 969 int argCount = l >> 2; 970 int poppedArgs = argCount > 4 ? 4 : argCount; 971 int adjustedL = l - (poppedArgs << 2) + mStackAlignmentAdjustment; 972 if (adjustedL < 0 || adjustedL > 4096-4) { 973 error("l out of range for stack offset: 0x%08x", l); 974 } 975 o4(0xE59DC000 | (0xfff & adjustedL)); // ldr r12, [sp,#adjustedL] 976 o4(0xE12FFF3C); // blx r12 977 } 978 979 virtual void adjustStackAfterCall(int l, bool isIndirect) { 980 LOG_API("adjustStackAfterCall(%d, %d);\n", l, isIndirect); 981 int argCount = l >> 2; 982 int stackArgs = argCount > 4 ? argCount - 4 : 0; 983 int stackUse = stackArgs + (isIndirect ? 1 : 0) 984 + (mStackAlignmentAdjustment >> 2); 985 if (stackUse) { 986 if (stackUse < 0 || stackUse > 255) { 987 error("L out of range for stack adjustment: 0x%08x", l); 988 } 989 o4(0xE28DDF00 | stackUse); // add sp, sp, #stackUse << 2 990 mStackUse -= stackUse * 4; 991 LOG_STACK("adjustStackAfterCall: %d\n", mStackUse); 992 } 993 } 994 995 virtual int jumpOffset() { 996 return 8; 997 } 998 999 /* output a symbol and patch all calls to it */ 1000 virtual void gsym(int t) { 1001 LOG_API("gsym(0x%x)\n", t); 1002 int n; 1003 int base = getBase(); 1004 int pc = getPC(); 1005 LOG_API("pc = 0x%x\n", pc); 1006 while (t) { 1007 int data = * (int*) t; 1008 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2); 1009 if (decodedOffset == 0) { 1010 n = 0; 1011 } else { 1012 n = base + decodedOffset; /* next value */ 1013 } 1014 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK) 1015 | encodeRelAddress(pc - t - 8); 1016 t = n; 1017 } 1018 } 1019 1020 virtual int finishCompile() { 1021#if defined(__arm__) 1022 const long base = long(getBase()); 1023 const long curr = long(getPC()); 1024 int err = cacheflush(base, curr, 0); 1025 return err; 1026#else 1027 return 0; 1028#endif 1029 } 1030 1031 virtual int disassemble(FILE* out) { 1032#ifdef ENABLE_ARM_DISASSEMBLY 1033 disasmOut = out; 1034 disasm_interface_t di; 1035 di.di_readword = disassemble_readword; 1036 di.di_printaddr = disassemble_printaddr; 1037 di.di_printf = disassemble_printf; 1038 1039 int base = getBase(); 1040 int pc = getPC(); 1041 for(int i = base; i < pc; i += 4) { 1042 fprintf(out, "%08x: %08x ", i, *(int*) i); 1043 ::disasm(&di, i, 0); 1044 } 1045#endif 1046 return 0; 1047 } 1048 1049 /** 1050 * Stack alignment (in bytes) for this type of data 1051 */ 1052 virtual size_t stackAlignment(Type* pType){ 1053 switch(pType->tag) { 1054 case TY_DOUBLE: 1055 return 8; 1056 default: 1057 return 4; 1058 } 1059 } 1060 1061 /** 1062 * Array element alignment (in bytes) for this type of data. 1063 */ 1064 virtual size_t sizeOf(Type* pType){ 1065 switch(pType->tag) { 1066 case TY_INT: 1067 return 4; 1068 case TY_CHAR: 1069 return 1; 1070 default: 1071 return 0; 1072 case TY_FLOAT: 1073 return 4; 1074 case TY_DOUBLE: 1075 return 8; 1076 case TY_POINTER: 1077 return 4; 1078 } 1079 } 1080 private: 1081 static FILE* disasmOut; 1082 1083 static u_int 1084 disassemble_readword(u_int address) 1085 { 1086 return(*((u_int *)address)); 1087 } 1088 1089 static void 1090 disassemble_printaddr(u_int address) 1091 { 1092 fprintf(disasmOut, "0x%08x", address); 1093 } 1094 1095 static void 1096 disassemble_printf(const char *fmt, ...) { 1097 va_list ap; 1098 va_start(ap, fmt); 1099 vfprintf(disasmOut, fmt, ap); 1100 va_end(ap); 1101 } 1102 1103 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff; 1104 1105 /** Encode a relative address that might also be 1106 * a label. 1107 */ 1108 int encodeAddress(int value) { 1109 int base = getBase(); 1110 if (value >= base && value <= getPC() ) { 1111 // This is a label, encode it relative to the base. 1112 value = value - base; 1113 } 1114 return encodeRelAddress(value); 1115 } 1116 1117 int encodeRelAddress(int value) { 1118 return BRANCH_REL_ADDRESS_MASK & (value >> 2); 1119 } 1120 1121 typedef int (*int2FnPtr)(int a, int b); 1122 void callRuntime(int2FnPtr fn) { 1123 o4(0xE59F2000); // ldr r2, .L1 1124 o4(0xEA000000); // b .L99 1125 o4((int) fn); //.L1: .word fn 1126 o4(0xE12FFF32); //.L99: blx r2 1127 } 1128 1129 static int runtime_DIV(int a, int b) { 1130 return b / a; 1131 } 1132 1133 static int runtime_MOD(int a, int b) { 1134 return b % a; 1135 } 1136 1137 static const int STACK_ALIGNMENT = 8; 1138 int mStackUse; 1139 // This variable holds the amount we adjusted the stack in the most 1140 // recent endFunctionCallArguments call. It's examined by the 1141 // following adjustStackAfterCall call. 1142 int mStackAlignmentAdjustment; 1143 }; 1144 1145#endif // PROVIDE_ARM_CODEGEN 1146 1147#ifdef PROVIDE_X86_CODEGEN 1148 1149 class X86CodeGenerator : public CodeGenerator { 1150 public: 1151 X86CodeGenerator() {} 1152 virtual ~X86CodeGenerator() {} 1153 1154 /* returns address to patch with local variable size 1155 */ 1156 virtual int functionEntry(int argCount) { 1157 o(0xe58955); /* push %ebp, mov %esp, %ebp */ 1158 return oad(0xec81, 0); /* sub $xxx, %esp */ 1159 } 1160 1161 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 1162 o(0xc3c9); /* leave, ret */ 1163 *(int *) localVariableAddress = localVariableSize; /* save local variables */ 1164 } 1165 1166 /* load immediate value */ 1167 virtual void li(int i, Type* pType) { 1168 oad(0xb8, i); /* mov $xx, %eax */ 1169 setR0Type(pType); 1170 } 1171 1172 virtual void lif(float f, Type* pType) { 1173 union { float f; int i; } converter; 1174 converter.f = f; 1175 setR0Type(pType); 1176 error("unimplemented: lif"); 1177 } 1178 1179 virtual void lid(double d, Type* pType) { 1180 union { double d; int i[2]; } converter; 1181 converter.d = d; 1182 setR0Type(pType); 1183 error("unimplemented: lid"); 1184 } 1185 1186 virtual int gjmp(int t) { 1187 return psym(0xe9, t); 1188 } 1189 1190 /* l = 0: je, l == 1: jne */ 1191 virtual int gtst(bool l, int t) { 1192 o(0x0fc085); /* test %eax, %eax, je/jne xxx */ 1193 return psym(0x84 + l, t); 1194 } 1195 1196 virtual void gcmp(int op) { 1197 int t = decodeOp(op); 1198 o(0x59); /* pop %ecx */ 1199 o(0xc139); /* cmp %eax,%ecx */ 1200 li(0, NULL); 1201 o(0x0f); /* setxx %al */ 1202 o(t + 0x90); 1203 o(0xc0); 1204 popType(); 1205 } 1206 1207 virtual void genOp(int op) { 1208 o(0x59); /* pop %ecx */ 1209 o(decodeOp(op)); 1210 if (op == OP_MOD) 1211 o(0x92); /* xchg %edx, %eax */ 1212 popType(); 1213 } 1214 1215 virtual void gUnaryCmp(int op) { 1216 oad(0xb9, 0); /* movl $0, %ecx */ 1217 int t = decodeOp(op); 1218 o(0xc139); /* cmp %eax,%ecx */ 1219 li(0, NULL); 1220 o(0x0f); /* setxx %al */ 1221 o(t + 0x90); 1222 o(0xc0); 1223 } 1224 1225 virtual void genUnaryOp(int op) { 1226 oad(0xb9, 0); /* movl $0, %ecx */ 1227 o(decodeOp(op)); 1228 } 1229 1230 virtual void pushR0() { 1231 o(0x50); /* push %eax */ 1232 pushType(); 1233 } 1234 1235 virtual void storeR0ToTOS(Type* pPointerType) { 1236 assert(pPointerType->tag == TY_POINTER); 1237 o(0x59); /* pop %ecx */ 1238 popType(); 1239 switch (pPointerType->pHead->tag) { 1240 case TY_INT: 1241 o(0x0189); /* movl %eax/%al, (%ecx) */ 1242 break; 1243 case TY_CHAR: 1244 o(0x0188); /* movl %eax/%al, (%ecx) */ 1245 break; 1246 default: 1247 error("storeR0ToTOS: unsupported type"); 1248 break; 1249 } 1250 } 1251 1252 virtual void loadR0FromR0(Type* pPointerType) { 1253 assert(pPointerType->tag == TY_POINTER); 1254 switch (pPointerType->pHead->tag) { 1255 case TY_INT: 1256 o(0x8b); /* mov (%eax), %eax */ 1257 break; 1258 case TY_CHAR: 1259 o(0xbe0f); /* movsbl (%eax), %eax */ 1260 break; 1261 default: 1262 error("loadR0FromR0: unsupported type"); 1263 break; 1264 } 1265 ob(0); /* add zero in code */ 1266 setR0Type(pPointerType->pHead); 1267 } 1268 1269 virtual void leaR0(int ea, Type* pPointerType) { 1270 gmov(10, ea); /* leal EA, %eax */ 1271 setR0Type(pPointerType); 1272 } 1273 1274 virtual void storeR0(int ea) { 1275 gmov(6, ea); /* mov %eax, EA */ 1276 } 1277 1278 virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) { 1279 gmov(8, ea); /* mov EA, %eax */ 1280 if (isIncDec) { 1281 /* Implement post-increment or post decrement. 1282 */ 1283 gmov(0, ea); /* 83 ADD */ 1284 o(decodeOp(op)); 1285 } 1286 setR0Type(pType); 1287 } 1288 1289 virtual void convertR0(Type* pType){ 1290 if (bitsSame(pType, getR0Type())) { 1291 return; 1292 } 1293 error("convertR0: unsupported conversion %d <- %d", pType->tag, 1294 getR0Type()->tag); 1295 } 1296 1297 virtual int beginFunctionCallArguments() { 1298 return oad(0xec81, 0); /* sub $xxx, %esp */ 1299 } 1300 1301 virtual void storeR0ToArg(int l) { 1302 oad(0x248489, l); /* movl %eax, xxx(%esp) */ 1303 } 1304 1305 virtual void endFunctionCallArguments(int a, int l) { 1306 * (int*) a = l; 1307 } 1308 1309 virtual int callForward(int symbol, Type* pFunc) { 1310 setR0Type(pFunc->pHead); 1311 return psym(0xe8, symbol); /* call xxx */ 1312 } 1313 1314 virtual void callRelative(int t, Type* pFunc) { 1315 setR0Type(pFunc->pHead); 1316 psym(0xe8, t); /* call xxx */ 1317 } 1318 1319 virtual void callIndirect(int l, Type* pFunc) { 1320 setR0Type(pFunc->pHead); 1321 oad(0x2494ff, l); /* call *xxx(%esp) */ 1322 } 1323 1324 virtual void adjustStackAfterCall(int l, bool isIndirect) { 1325 if (isIndirect) { 1326 l += 4; 1327 } 1328 if (l > 0) { 1329 oad(0xc481, l); /* add $xxx, %esp */ 1330 } 1331 } 1332 1333 virtual int jumpOffset() { 1334 return 5; 1335 } 1336 1337 virtual int disassemble(FILE* out) { 1338 return 0; 1339 } 1340 1341 /* output a symbol and patch all calls to it */ 1342 virtual void gsym(int t) { 1343 int n; 1344 int pc = getPC(); 1345 while (t) { 1346 n = *(int *) t; /* next value */ 1347 *(int *) t = pc - t - 4; 1348 t = n; 1349 } 1350 } 1351 1352 virtual int finishCompile() { 1353 size_t pagesize = 4096; 1354 size_t base = (size_t) getBase() & ~ (pagesize - 1); 1355 size_t top = ((size_t) getPC() + pagesize - 1) & ~ (pagesize - 1); 1356 int err = mprotect((void*) base, top - base, PROT_READ | PROT_WRITE | PROT_EXEC); 1357 if (err) { 1358 error("mprotect() failed: %d", errno); 1359 } 1360 return err; 1361 } 1362 1363 /** 1364 * Stack alignment (in bytes) for this type of data 1365 */ 1366 virtual size_t stackAlignment(Type* pType){ 1367 switch(pType->tag) { 1368 case TY_DOUBLE: 1369 return 8; 1370 default: 1371 return 4; 1372 } 1373 } 1374 1375 /** 1376 * Array element alignment (in bytes) for this type of data. 1377 */ 1378 virtual size_t sizeOf(Type* pType){ 1379 switch(pType->tag) { 1380 case TY_INT: 1381 return 4; 1382 case TY_CHAR: 1383 return 1; 1384 default: 1385 return 0; 1386 case TY_FLOAT: 1387 return 4; 1388 case TY_DOUBLE: 1389 return 8; 1390 case TY_POINTER: 1391 return 4; 1392 } 1393 } 1394 1395 private: 1396 1397 /** Output 1 to 4 bytes. 1398 * 1399 */ 1400 void o(int n) { 1401 /* cannot use unsigned, so we must do a hack */ 1402 while (n && n != -1) { 1403 ob(n & 0xff); 1404 n = n >> 8; 1405 } 1406 } 1407 1408 /* psym is used to put an instruction with a data field which is a 1409 reference to a symbol. It is in fact the same as oad ! */ 1410 int psym(int n, int t) { 1411 return oad(n, t); 1412 } 1413 1414 /* instruction + address */ 1415 int oad(int n, int t) { 1416 o(n); 1417 int result = getPC(); 1418 o4(t); 1419 return result; 1420 } 1421 1422 1423 static const int operatorHelper[]; 1424 1425 int decodeOp(int op) { 1426 if (op < 0 || op > OP_COUNT) { 1427 error("Out-of-range operator: %d\n", op); 1428 op = 0; 1429 } 1430 return operatorHelper[op]; 1431 } 1432 1433 void gmov(int l, int t) { 1434 o(l + 0x83); 1435 oad((t > -LOCAL && t < LOCAL) << 7 | 5, t); 1436 } 1437 }; 1438 1439#endif // PROVIDE_X86_CODEGEN 1440 1441#ifdef PROVIDE_TRACE_CODEGEN 1442 class TraceCodeGenerator : public CodeGenerator { 1443 private: 1444 CodeGenerator* mpBase; 1445 1446 public: 1447 TraceCodeGenerator(CodeGenerator* pBase) { 1448 mpBase = pBase; 1449 } 1450 1451 virtual ~TraceCodeGenerator() { 1452 delete mpBase; 1453 } 1454 1455 virtual void init(CodeBuf* pCodeBuf) { 1456 mpBase->init(pCodeBuf); 1457 } 1458 1459 void setErrorSink(ErrorSink* pErrorSink) { 1460 mpBase->setErrorSink(pErrorSink); 1461 } 1462 1463 /* returns address to patch with local variable size 1464 */ 1465 virtual int functionEntry(int argCount) { 1466 int result = mpBase->functionEntry(argCount); 1467 fprintf(stderr, "functionEntry(%d) -> %d\n", argCount, result); 1468 return result; 1469 } 1470 1471 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 1472 fprintf(stderr, "functionExit(%d, %d, %d)\n", 1473 argCount, localVariableAddress, localVariableSize); 1474 mpBase->functionExit(argCount, localVariableAddress, localVariableSize); 1475 } 1476 1477 /* load immediate value */ 1478 virtual void li(int t, Type* pType) { 1479 fprintf(stderr, "li(%d)\n", t); 1480 mpBase->li(t, pType); 1481 } 1482 1483 virtual void lif(float f, Type* pType) { 1484 fprintf(stderr, "lif(%g)\n", f); 1485 mpBase->lif(f, pType); 1486 } 1487 1488 virtual void lid(double d, Type* pType) { 1489 fprintf(stderr, "lid(%g)\n", d); 1490 mpBase->lid(d, pType); 1491 } 1492 1493 virtual int gjmp(int t) { 1494 int result = mpBase->gjmp(t); 1495 fprintf(stderr, "gjmp(%d) = %d\n", t, result); 1496 return result; 1497 } 1498 1499 /* l = 0: je, l == 1: jne */ 1500 virtual int gtst(bool l, int t) { 1501 int result = mpBase->gtst(l, t); 1502 fprintf(stderr, "gtst(%d,%d) = %d\n", l, t, result); 1503 return result; 1504 } 1505 1506 virtual void gcmp(int op) { 1507 fprintf(stderr, "gcmp(%d)\n", op); 1508 mpBase->gcmp(op); 1509 } 1510 1511 virtual void genOp(int op) { 1512 fprintf(stderr, "genOp(%d)\n", op); 1513 mpBase->genOp(op); 1514 } 1515 1516 1517 virtual void gUnaryCmp(int op) { 1518 fprintf(stderr, "gUnaryCmp(%d)\n", op); 1519 mpBase->gUnaryCmp(op); 1520 } 1521 1522 virtual void genUnaryOp(int op) { 1523 fprintf(stderr, "genUnaryOp(%d)\n", op); 1524 mpBase->genUnaryOp(op); 1525 } 1526 1527 virtual void pushR0() { 1528 fprintf(stderr, "pushR0()\n"); 1529 mpBase->pushR0(); 1530 } 1531 1532 virtual void storeR0ToTOS(Type* pPointerType) { 1533 fprintf(stderr, "storeR0ToTOS(%d)\n", pPointerType->pHead->tag); 1534 mpBase->storeR0ToTOS(pPointerType); 1535 } 1536 1537 virtual void loadR0FromR0(Type* pPointerType) { 1538 fprintf(stderr, "loadR0FromR0(%d)\n", pPointerType->pHead->tag); 1539 mpBase->loadR0FromR0(pPointerType); 1540 } 1541 1542 virtual void leaR0(int ea, Type* pPointerType) { 1543 fprintf(stderr, "leaR0(%d)\n", ea); 1544 mpBase->leaR0(ea, pPointerType); 1545 } 1546 1547 virtual void storeR0(int ea) { 1548 fprintf(stderr, "storeR0(%d)\n", ea); 1549 mpBase->storeR0(ea); 1550 } 1551 1552 virtual void loadR0(int ea, bool isIncDec, int op, Type* pType) { 1553 fprintf(stderr, "loadR0(%d, %d, %d)\n", ea, isIncDec, op); 1554 mpBase->loadR0(ea, isIncDec, op, pType); 1555 } 1556 1557 virtual void convertR0(Type* pType){ 1558 fprintf(stderr, "convertR0(pType)\n"); 1559 mpBase->convertR0(pType); 1560 } 1561 1562 virtual int beginFunctionCallArguments() { 1563 int result = mpBase->beginFunctionCallArguments(); 1564 fprintf(stderr, "beginFunctionCallArguments() = %d\n", result); 1565 return result; 1566 } 1567 1568 virtual void storeR0ToArg(int l) { 1569 fprintf(stderr, "storeR0ToArg(%d)\n", l); 1570 mpBase->storeR0ToArg(l); 1571 } 1572 1573 virtual void endFunctionCallArguments(int a, int l) { 1574 fprintf(stderr, "endFunctionCallArguments(%d, %d)\n", a, l); 1575 mpBase->endFunctionCallArguments(a, l); 1576 } 1577 1578 virtual int callForward(int symbol, Type* pFunc) { 1579 int result = mpBase->callForward(symbol, pFunc); 1580 fprintf(stderr, "callForward(%d) = %d\n", symbol, result); 1581 return result; 1582 } 1583 1584 virtual void callRelative(int t, Type* pFunc) { 1585 fprintf(stderr, "callRelative(%d)\n", t); 1586 mpBase->callRelative(t, pFunc); 1587 } 1588 1589 virtual void callIndirect(int l, Type* pFunc) { 1590 fprintf(stderr, "callIndirect(%d)\n", l); 1591 mpBase->callIndirect(l, pFunc); 1592 } 1593 1594 virtual void adjustStackAfterCall(int l, bool isIndirect) { 1595 fprintf(stderr, "adjustStackAfterCall(%d, %d)\n", l, isIndirect); 1596 mpBase->adjustStackAfterCall(l, isIndirect); 1597 } 1598 1599 virtual int jumpOffset() { 1600 return mpBase->jumpOffset(); 1601 } 1602 1603 virtual int disassemble(FILE* out) { 1604 return mpBase->disassemble(out); 1605 } 1606 1607 /* output a symbol and patch all calls to it */ 1608 virtual void gsym(int t) { 1609 fprintf(stderr, "gsym(%d)\n", t); 1610 mpBase->gsym(t); 1611 } 1612 1613 virtual int finishCompile() { 1614 int result = mpBase->finishCompile(); 1615 fprintf(stderr, "finishCompile() = %d\n", result); 1616 return result; 1617 } 1618 1619 /** 1620 * Stack alignment (in bytes) for this type of data 1621 */ 1622 virtual size_t stackAlignment(Type* pType){ 1623 return mpBase->stackAlignment(pType); 1624 } 1625 1626 /** 1627 * Array element alignment (in bytes) for this type of data. 1628 */ 1629 virtual size_t sizeOf(Type* pType){ 1630 return mpBase->sizeOf(pType); 1631 } 1632 }; 1633 1634#endif // PROVIDE_TRACE_CODEGEN 1635 1636 class Arena { 1637 public: 1638 // Used to record a given allocation amount. 1639 // Used: 1640 // Mark mark = arena.mark(); 1641 // ... lots of arena.allocate() 1642 // arena.free(mark); 1643 1644 struct Mark { 1645 size_t chunk; 1646 size_t offset; 1647 }; 1648 1649 Arena() { 1650 mCurrentChunk = 0; 1651 Chunk start(CHUNK_SIZE); 1652 mData.push_back(start); 1653 } 1654 1655 ~Arena() { 1656 for(size_t i = 0; i < mData.size(); i++) { 1657 mData[i].free(); 1658 } 1659 } 1660 1661 // Alloc using the standard alignment size safe for any variable 1662 void* alloc(size_t size) { 1663 return alloc(size, 8); 1664 } 1665 1666 Mark mark(){ 1667 Mark result; 1668 result.chunk = mCurrentChunk; 1669 result.offset = mData[mCurrentChunk].mOffset; 1670 return result; 1671 } 1672 1673 void freeToMark(const Mark& mark) { 1674 mCurrentChunk = mark.chunk; 1675 mData[mCurrentChunk].mOffset = mark.offset; 1676 } 1677 1678 private: 1679 // Allocate memory aligned to a given size 1680 // and a given power-of-two-sized alignment (e.g. 1,2,4,8,...) 1681 // Memory is not zero filled. 1682 1683 void* alloc(size_t size, size_t alignment) { 1684 while (size > mData[mCurrentChunk].remainingCapacity(alignment)) { 1685 if (mCurrentChunk + 1 < mData.size()) { 1686 mCurrentChunk++; 1687 } else { 1688 size_t allocSize = CHUNK_SIZE; 1689 if (allocSize < size + alignment - 1) { 1690 allocSize = size + alignment - 1; 1691 } 1692 Chunk chunk(allocSize); 1693 mData.push_back(chunk); 1694 mCurrentChunk++; 1695 } 1696 } 1697 return mData[mCurrentChunk].allocate(size, alignment); 1698 } 1699 1700 static const size_t CHUNK_SIZE = 128*1024; 1701 // Note: this class does not deallocate its 1702 // memory when it's destroyed. It depends upon 1703 // its parent to deallocate the memory. 1704 struct Chunk { 1705 Chunk() { 1706 mpData = 0; 1707 mSize = 0; 1708 mOffset = 0; 1709 } 1710 1711 Chunk(size_t size) { 1712 mSize = size; 1713 mpData = (char*) malloc(size); 1714 mOffset = 0; 1715 } 1716 1717 ~Chunk() { 1718 // Doesn't deallocate memory. 1719 } 1720 1721 void* allocate(size_t size, size_t alignment) { 1722 size_t alignedOffset = aligned(mOffset, alignment); 1723 void* result = mpData + alignedOffset; 1724 mOffset = alignedOffset + size; 1725 return result; 1726 } 1727 1728 void free() { 1729 if (mpData) { 1730 ::free(mpData); 1731 mpData = 0; 1732 } 1733 } 1734 1735 size_t remainingCapacity(size_t alignment) { 1736 return aligned(mSize, alignment) - aligned(mOffset, alignment); 1737 } 1738 1739 // Assume alignment is a power of two 1740 inline size_t aligned(size_t v, size_t alignment) { 1741 size_t mask = alignment-1; 1742 return (v + mask) & ~mask; 1743 } 1744 1745 char* mpData; 1746 size_t mSize; 1747 size_t mOffset; 1748 }; 1749 1750 size_t mCurrentChunk; 1751 1752 Vector<Chunk> mData; 1753 }; 1754 1755 struct VariableInfo; 1756 1757 struct Token { 1758 int hash; 1759 size_t length; 1760 char* pText; 1761 tokenid_t id; 1762 1763 // Current values for the token 1764 char* mpMacroDefinition; 1765 VariableInfo* mpVariableInfo; 1766 }; 1767 1768 class TokenTable { 1769 public: 1770 // Don't use 0..0xff, allows characters and operators to be tokens too. 1771 1772 static const int TOKEN_BASE = 0x100; 1773 TokenTable() { 1774 mpMap = hashmapCreate(128, hashFn, equalsFn); 1775 } 1776 1777 ~TokenTable() { 1778 hashmapFree(mpMap); 1779 } 1780 1781 void setArena(Arena* pArena) { 1782 mpArena = pArena; 1783 } 1784 1785 // Returns a token for a given string of characters. 1786 tokenid_t intern(const char* pText, size_t length) { 1787 Token probe; 1788 int hash = hashmapHash((void*) pText, length); 1789 { 1790 Token probe; 1791 probe.hash = hash; 1792 probe.length = length; 1793 probe.pText = (char*) pText; 1794 Token* pValue = (Token*) hashmapGet(mpMap, &probe); 1795 if (pValue) { 1796 return pValue->id; 1797 } 1798 } 1799 1800 Token* pToken = (Token*) mpArena->alloc(sizeof(Token)); 1801 memset(pToken, 0, sizeof(*pToken)); 1802 pToken->hash = hash; 1803 pToken->length = length; 1804 pToken->pText = (char*) mpArena->alloc(length + 1); 1805 memcpy(pToken->pText, pText, length); 1806 pToken->pText[length] = 0; 1807 pToken->id = mTokens.size() + TOKEN_BASE; 1808 mTokens.push_back(pToken); 1809 hashmapPut(mpMap, pToken, pToken); 1810 return pToken->id; 1811 } 1812 1813 // Return the Token for a given tokenid. 1814 Token& operator[](tokenid_t id) { 1815 return *mTokens[id - TOKEN_BASE]; 1816 } 1817 1818 inline size_t size() { 1819 return mTokens.size(); 1820 } 1821 1822 private: 1823 1824 static int hashFn(void* pKey) { 1825 Token* pToken = (Token*) pKey; 1826 return pToken->hash; 1827 } 1828 1829 static bool equalsFn(void* keyA, void* keyB) { 1830 Token* pTokenA = (Token*) keyA; 1831 Token* pTokenB = (Token*) keyB; 1832 // Don't need to compare hash values, they should always be equal 1833 return pTokenA->length == pTokenB->length 1834 && strcmp(pTokenA->pText, pTokenB->pText) == 0; 1835 } 1836 1837 Hashmap* mpMap; 1838 Vector<Token*> mTokens; 1839 Arena* mpArena; 1840 }; 1841 1842 class InputStream { 1843 public: 1844 int getChar() { 1845 if (bumpLine) { 1846 line++; 1847 bumpLine = false; 1848 } 1849 int ch = get(); 1850 if (ch == '\n') { 1851 bumpLine = true; 1852 } 1853 return ch; 1854 } 1855 int getLine() { 1856 return line; 1857 } 1858 protected: 1859 InputStream() : 1860 line(1), bumpLine(false) { 1861 } 1862 private: 1863 virtual int get() = 0; 1864 int line; 1865 bool bumpLine; 1866 }; 1867 1868 class FileInputStream : public InputStream { 1869 public: 1870 FileInputStream(FILE* in) : f(in) {} 1871 private: 1872 virtual int get() { return fgetc(f); } 1873 FILE* f; 1874 }; 1875 1876 class TextInputStream : public InputStream { 1877 public: 1878 TextInputStream(const char* text, size_t textLength) 1879 : pText(text), mTextLength(textLength), mPosition(0) { 1880 } 1881 1882 private: 1883 virtual int get() { 1884 return mPosition < mTextLength ? pText[mPosition++] : EOF; 1885 } 1886 1887 const char* pText; 1888 size_t mTextLength; 1889 size_t mPosition; 1890 }; 1891 1892 class String { 1893 public: 1894 String() { 1895 mpBase = 0; 1896 mUsed = 0; 1897 mSize = 0; 1898 } 1899 1900 String(const char* item, int len, bool adopt) { 1901 if (len < 0) { 1902 len = strlen(item); 1903 } 1904 if (adopt) { 1905 mpBase = (char*) item; 1906 mUsed = len; 1907 mSize = len + 1; 1908 } else { 1909 mpBase = 0; 1910 mUsed = 0; 1911 mSize = 0; 1912 appendBytes(item, len); 1913 } 1914 } 1915 1916 String(const String& other) { 1917 mpBase = 0; 1918 mUsed = 0; 1919 mSize = 0; 1920 appendBytes(other.getUnwrapped(), other.len()); 1921 } 1922 1923 ~String() { 1924 if (mpBase) { 1925 free(mpBase); 1926 } 1927 } 1928 1929 String& operator=(const String& other) { 1930 clear(); 1931 appendBytes(other.getUnwrapped(), other.len()); 1932 return *this; 1933 } 1934 1935 inline char* getUnwrapped() const { 1936 return mpBase; 1937 } 1938 1939 void clear() { 1940 mUsed = 0; 1941 if (mSize > 0) { 1942 mpBase[0] = 0; 1943 } 1944 } 1945 1946 void appendCStr(const char* s) { 1947 appendBytes(s, strlen(s)); 1948 } 1949 1950 void appendBytes(const char* s, int n) { 1951 memcpy(ensure(n), s, n + 1); 1952 } 1953 1954 void append(char c) { 1955 * ensure(1) = c; 1956 } 1957 1958 void append(String& other) { 1959 appendBytes(other.getUnwrapped(), other.len()); 1960 } 1961 1962 char* orphan() { 1963 char* result = mpBase; 1964 mpBase = 0; 1965 mUsed = 0; 1966 mSize = 0; 1967 return result; 1968 } 1969 1970 void printf(const char* fmt,...) { 1971 va_list ap; 1972 va_start(ap, fmt); 1973 vprintf(fmt, ap); 1974 va_end(ap); 1975 } 1976 1977 void vprintf(const char* fmt, va_list ap) { 1978 char* temp; 1979 int numChars = vasprintf(&temp, fmt, ap); 1980 memcpy(ensure(numChars), temp, numChars+1); 1981 free(temp); 1982 } 1983 1984 inline size_t len() const { 1985 return mUsed; 1986 } 1987 1988 private: 1989 char* ensure(int n) { 1990 size_t newUsed = mUsed + n; 1991 if (newUsed > mSize) { 1992 size_t newSize = mSize * 2 + 10; 1993 if (newSize < newUsed) { 1994 newSize = newUsed; 1995 } 1996 mpBase = (char*) realloc(mpBase, newSize + 1); 1997 mSize = newSize; 1998 } 1999 mpBase[newUsed] = '\0'; 2000 char* result = mpBase + mUsed; 2001 mUsed = newUsed; 2002 return result; 2003 } 2004 2005 char* mpBase; 2006 size_t mUsed; 2007 size_t mSize; 2008 }; 2009 2010 void internKeywords() { 2011 // Note: order has to match TOK_ constants 2012 static const char* keywords[] = { 2013 "int", 2014 "char", 2015 "void", 2016 "if", 2017 "else", 2018 "while", 2019 "break", 2020 "return", 2021 "for", 2022 "pragma", 2023 "define", 2024 "auto", 2025 "case", 2026 "const", 2027 "continue", 2028 "default", 2029 "do", 2030 "double", 2031 "enum", 2032 "extern", 2033 "float", 2034 "goto", 2035 "long", 2036 "register", 2037 "short", 2038 "signed", 2039 "sizeof", 2040 "static", 2041 "struct", 2042 "switch", 2043 "typedef", 2044 "union", 2045 "unsigned", 2046 "volatile", 2047 "_Bool", 2048 "_Complex", 2049 "_Imaginary", 2050 "inline", 2051 "restrict", 2052 0}; 2053 2054 for(int i = 0; keywords[i]; i++) { 2055 mTokenTable.intern(keywords[i], strlen(keywords[i])); 2056 } 2057 } 2058 2059 struct InputState { 2060 InputStream* pStream; 2061 int oldCh; 2062 }; 2063 2064 struct VariableInfo { 2065 void* pAddress; 2066 void* pForward; // For a forward direction, linked list of data to fix up 2067 tokenid_t tok; 2068 size_t level; 2069 VariableInfo* pOldDefinition; 2070 Type* pType; 2071 }; 2072 2073 class SymbolStack { 2074 public: 2075 SymbolStack() { 2076 mpArena = 0; 2077 mpTokenTable = 0; 2078 } 2079 2080 void setArena(Arena* pArena) { 2081 mpArena = pArena; 2082 } 2083 2084 void setTokenTable(TokenTable* pTokenTable) { 2085 mpTokenTable = pTokenTable; 2086 } 2087 2088 void pushLevel() { 2089 Mark mark; 2090 mark.mArenaMark = mpArena->mark(); 2091 mark.mSymbolHead = mStack.size(); 2092 mLevelStack.push_back(mark); 2093 } 2094 2095 void popLevel() { 2096 // Undo any shadowing that was done: 2097 Mark mark = mLevelStack.back(); 2098 mLevelStack.pop_back(); 2099 while (mStack.size() > mark.mSymbolHead) { 2100 VariableInfo* pV = mStack.back(); 2101 mStack.pop_back(); 2102 (*mpTokenTable)[pV->tok].mpVariableInfo = pV->pOldDefinition; 2103 } 2104 mpArena->freeToMark(mark.mArenaMark); 2105 } 2106 2107 bool isDefinedAtCurrentLevel(tokenid_t tok) { 2108 VariableInfo* pV = (*mpTokenTable)[tok].mpVariableInfo; 2109 return pV && pV->level == level(); 2110 } 2111 2112 VariableInfo* add(tokenid_t tok) { 2113 Token& token = (*mpTokenTable)[tok]; 2114 VariableInfo* pOldV = token.mpVariableInfo; 2115 VariableInfo* pNewV = 2116 (VariableInfo*) mpArena->alloc(sizeof(VariableInfo)); 2117 memset(pNewV, 0, sizeof(VariableInfo)); 2118 pNewV->tok = tok; 2119 pNewV->level = level(); 2120 pNewV->pOldDefinition = pOldV; 2121 token.mpVariableInfo = pNewV; 2122 mStack.push_back(pNewV); 2123 return pNewV; 2124 } 2125 2126 VariableInfo* add(Type* pType) { 2127 VariableInfo* pVI = add(pType->id); 2128 pVI->pType = pType; 2129 return pVI; 2130 } 2131 2132 void forEach(bool (*fn)(VariableInfo*, void*), void* context) { 2133 for (size_t i = 0; i < mStack.size(); i++) { 2134 if (! fn(mStack[i], context)) { 2135 break; 2136 } 2137 } 2138 } 2139 2140 private: 2141 inline size_t level() { 2142 return mLevelStack.size(); 2143 } 2144 2145 struct Mark { 2146 Arena::Mark mArenaMark; 2147 size_t mSymbolHead; 2148 }; 2149 2150 Arena* mpArena; 2151 TokenTable* mpTokenTable; 2152 Vector<VariableInfo*> mStack; 2153 Vector<Mark> mLevelStack; 2154 }; 2155 2156 int ch; // Current input character, or EOF 2157 tokenid_t tok; // token 2158 intptr_t tokc; // token extra info 2159 double tokd; // floating point constant value 2160 int tokl; // token operator level 2161 intptr_t rsym; // return symbol 2162 Type* pReturnType; // type of the current function's return. 2163 intptr_t loc; // local variable index 2164 char* glo; // global variable index 2165 String mTokenString; 2166 char* dptr; // Macro state: Points to macro text during macro playback. 2167 int dch; // Macro state: Saves old value of ch during a macro playback. 2168 char* pGlobalBase; 2169 2170 // Arena for the duration of the compile 2171 Arena mGlobalArena; 2172 // Arena for data that's only needed when compiling a single function 2173 Arena mLocalArena; 2174 2175 TokenTable mTokenTable; 2176 SymbolStack mGlobals; 2177 SymbolStack mLocals; 2178 2179 // Prebuilt types, makes things slightly faster. 2180 Type* mkpInt; // int 2181 Type* mkpChar; // char 2182 Type* mkpVoid; // void 2183 Type* mkpFloat; 2184 Type* mkpDouble; 2185 Type* mkpIntFn; 2186 Type* mkpIntPtr; 2187 Type* mkpCharPtr; 2188 Type* mkpPtrIntFn; 2189 2190 InputStream* file; 2191 2192 CodeBuf codeBuf; 2193 CodeGenerator* pGen; 2194 2195 String mErrorBuf; 2196 2197 String mPragmas; 2198 int mPragmaStringCount; 2199 2200 static const int ALLOC_SIZE = 99999; 2201 2202 static const int TOK_DUMMY = 1; 2203 static const int TOK_NUM = 2; 2204 static const int TOK_NUM_FLOAT = 3; 2205 static const int TOK_NUM_DOUBLE = 4; 2206 2207 // 3..255 are character and/or operators 2208 2209 // Keywords start at 0x100 and increase by 1 2210 // Order has to match string list in "internKeywords". 2211 enum { 2212 TOK_KEYWORD = TokenTable::TOKEN_BASE, 2213 TOK_INT = TOK_KEYWORD, 2214 TOK_CHAR, 2215 TOK_VOID, 2216 TOK_IF, 2217 TOK_ELSE, 2218 TOK_WHILE, 2219 TOK_BREAK, 2220 TOK_RETURN, 2221 TOK_FOR, 2222 TOK_PRAGMA, 2223 TOK_DEFINE, 2224 TOK_AUTO, 2225 TOK_CASE, 2226 TOK_CONST, 2227 TOK_CONTINUE, 2228 TOK_DEFAULT, 2229 TOK_DO, 2230 TOK_DOUBLE, 2231 TOK_ENUM, 2232 TOK_EXTERN, 2233 TOK_FLOAT, 2234 TOK_GOTO, 2235 TOK_LONG, 2236 TOK_REGISTER, 2237 TOK_SHORT, 2238 TOK_SIGNED, 2239 TOK_SIZEOF, 2240 TOK_STATIC, 2241 TOK_STRUCT, 2242 TOK_SWITCH, 2243 TOK_TYPEDEF, 2244 TOK_UNION, 2245 TOK_UNSIGNED, 2246 TOK_VOLATILE, 2247 TOK__BOOL, 2248 TOK__COMPLEX, 2249 TOK__IMAGINARY, 2250 TOK_INLINE, 2251 TOK_RESTRICT, 2252 // Symbols start after tokens 2253 TOK_SYMBOL 2254 }; 2255 2256 static const int LOCAL = 0x200; 2257 2258 static const int SYM_FORWARD = 0; 2259 static const int SYM_DEFINE = 1; 2260 2261 /* tokens in string heap */ 2262 static const int TAG_TOK = ' '; 2263 2264 static const int OP_INCREMENT = 0; 2265 static const int OP_DECREMENT = 1; 2266 static const int OP_MUL = 2; 2267 static const int OP_DIV = 3; 2268 static const int OP_MOD = 4; 2269 static const int OP_PLUS = 5; 2270 static const int OP_MINUS = 6; 2271 static const int OP_SHIFT_LEFT = 7; 2272 static const int OP_SHIFT_RIGHT = 8; 2273 static const int OP_LESS_EQUAL = 9; 2274 static const int OP_GREATER_EQUAL = 10; 2275 static const int OP_LESS = 11; 2276 static const int OP_GREATER = 12; 2277 static const int OP_EQUALS = 13; 2278 static const int OP_NOT_EQUALS = 14; 2279 static const int OP_LOGICAL_AND = 15; 2280 static const int OP_LOGICAL_OR = 16; 2281 static const int OP_BIT_AND = 17; 2282 static const int OP_BIT_XOR = 18; 2283 static const int OP_BIT_OR = 19; 2284 static const int OP_BIT_NOT = 20; 2285 static const int OP_LOGICAL_NOT = 21; 2286 static const int OP_COUNT = 22; 2287 2288 /* Operators are searched from front, the two-character operators appear 2289 * before the single-character operators with the same first character. 2290 * @ is used to pad out single-character operators. 2291 */ 2292 static const char* operatorChars; 2293 static const char operatorLevel[]; 2294 2295 /* Called when we detect an internal problem. Does nothing in production. 2296 * 2297 */ 2298 void internalError() { 2299 * (char*) 0 = 0; 2300 } 2301 2302 void assert(bool isTrue) { 2303 if (!isTrue) { 2304 internalError(); 2305 } 2306 } 2307 2308 bool isSymbol(tokenid_t t) { 2309 return t >= TOK_SYMBOL && 2310 ((size_t) (t-TOK_SYMBOL)) < mTokenTable.size(); 2311 } 2312 2313 bool isSymbolOrKeyword(tokenid_t t) { 2314 return t >= TOK_KEYWORD && 2315 ((size_t) (t-TOK_KEYWORD)) < mTokenTable.size(); 2316 } 2317 2318 VariableInfo* VI(tokenid_t t) { 2319 assert(isSymbol(t)); 2320 VariableInfo* pV = mTokenTable[t].mpVariableInfo; 2321 if (pV && pV->tok != t) { 2322 internalError(); 2323 } 2324 return pV; 2325 } 2326 2327 inline bool isDefined(tokenid_t t) { 2328 return t >= TOK_SYMBOL && VI(t) != 0; 2329 } 2330 2331 const char* nameof(tokenid_t t) { 2332 assert(isSymbolOrKeyword(t)); 2333 return mTokenTable[t].pText; 2334 } 2335 2336 void pdef(int t) { 2337 mTokenString.append(t); 2338 } 2339 2340 void inp() { 2341 if (dptr) { 2342 ch = *dptr++; 2343 if (ch == 0) { 2344 dptr = 0; 2345 ch = dch; 2346 } 2347 } else 2348 ch = file->getChar(); 2349#if 0 2350 printf("ch='%c' 0x%x\n", ch, ch); 2351#endif 2352 } 2353 2354 int isid() { 2355 return isalnum(ch) | (ch == '_'); 2356 } 2357 2358 /* read a character constant, advances ch to after end of constant */ 2359 int getq() { 2360 int val = ch; 2361 if (ch == '\\') { 2362 inp(); 2363 if (isoctal(ch)) { 2364 // 1 to 3 octal characters. 2365 val = 0; 2366 for(int i = 0; i < 3; i++) { 2367 if (isoctal(ch)) { 2368 val = (val << 3) + ch - '0'; 2369 inp(); 2370 } 2371 } 2372 return val; 2373 } else if (ch == 'x' || ch == 'X') { 2374 // N hex chars 2375 inp(); 2376 if (! isxdigit(ch)) { 2377 error("'x' character escape requires at least one digit."); 2378 } else { 2379 val = 0; 2380 while (isxdigit(ch)) { 2381 int d = ch; 2382 if (isdigit(d)) { 2383 d -= '0'; 2384 } else if (d <= 'F') { 2385 d = d - 'A' + 10; 2386 } else { 2387 d = d - 'a' + 10; 2388 } 2389 val = (val << 4) + d; 2390 inp(); 2391 } 2392 } 2393 } else { 2394 int val = ch; 2395 switch (ch) { 2396 case 'a': 2397 val = '\a'; 2398 break; 2399 case 'b': 2400 val = '\b'; 2401 break; 2402 case 'f': 2403 val = '\f'; 2404 break; 2405 case 'n': 2406 val = '\n'; 2407 break; 2408 case 'r': 2409 val = '\r'; 2410 break; 2411 case 't': 2412 val = '\t'; 2413 break; 2414 case 'v': 2415 val = '\v'; 2416 break; 2417 case '\\': 2418 val = '\\'; 2419 break; 2420 case '\'': 2421 val = '\''; 2422 break; 2423 case '"': 2424 val = '"'; 2425 break; 2426 case '?': 2427 val = '?'; 2428 break; 2429 default: 2430 error("Undefined character escape %c", ch); 2431 break; 2432 } 2433 inp(); 2434 return val; 2435 } 2436 } else { 2437 inp(); 2438 } 2439 return val; 2440 } 2441 2442 static bool isoctal(int ch) { 2443 return ch >= '0' && ch <= '7'; 2444 } 2445 2446 bool acceptCh(int c) { 2447 bool result = c == ch; 2448 if (result) { 2449 pdef(ch); 2450 inp(); 2451 } 2452 return result; 2453 } 2454 2455 bool acceptDigitsCh() { 2456 bool result = false; 2457 while (isdigit(ch)) { 2458 result = true; 2459 pdef(ch); 2460 inp(); 2461 } 2462 return result; 2463 } 2464 2465 void parseFloat() { 2466 tok = TOK_NUM_DOUBLE; 2467 // mTokenString already has the integral part of the number. 2468 acceptCh('.'); 2469 acceptDigitsCh(); 2470 bool doExp = true; 2471 if (acceptCh('e') || acceptCh('E')) { 2472 // Don't need to do any extra work 2473 } else if (ch == 'f' || ch == 'F') { 2474 pdef('e'); // So it can be parsed by strtof. 2475 inp(); 2476 tok = TOK_NUM_FLOAT; 2477 } else { 2478 doExp = false; 2479 } 2480 if (doExp) { 2481 bool digitsRequired = acceptCh('-'); 2482 bool digitsFound = acceptDigitsCh(); 2483 if (digitsRequired && ! digitsFound) { 2484 error("malformed exponent"); 2485 } 2486 } 2487 char* pText = mTokenString.getUnwrapped(); 2488 if (tok == TOK_NUM_FLOAT) { 2489 tokd = strtof(pText, 0); 2490 } else { 2491 tokd = strtod(pText, 0); 2492 } 2493 //fprintf(stderr, "float constant: %s (%d) %g\n", pText, tok, tokd); 2494 } 2495 2496 void next() { 2497 int l, a; 2498 2499 while (isspace(ch) | (ch == '#')) { 2500 if (ch == '#') { 2501 inp(); 2502 next(); 2503 if (tok == TOK_DEFINE) { 2504 doDefine(); 2505 } else if (tok == TOK_PRAGMA) { 2506 doPragma(); 2507 } else { 2508 error("Unsupported preprocessor directive \"%s\"", 2509 mTokenString.getUnwrapped()); 2510 } 2511 } 2512 inp(); 2513 } 2514 tokl = 0; 2515 tok = ch; 2516 /* encode identifiers & numbers */ 2517 if (isid()) { 2518 mTokenString.clear(); 2519 while (isid()) { 2520 pdef(ch); 2521 inp(); 2522 } 2523 if (isdigit(tok)) { 2524 // Start of a numeric constant. Could be integer, float, or 2525 // double, won't know until we look further. 2526 if (ch == '.' || ch == 'e' || ch == 'e' 2527 || ch == 'f' || ch == 'F') { 2528 parseFloat(); 2529 } else { 2530 // It's an integer constant 2531 tokc = strtol(mTokenString.getUnwrapped(), 0, 0); 2532 tok = TOK_NUM; 2533 } 2534 } else { 2535 tok = mTokenTable.intern(mTokenString.getUnwrapped(), 2536 mTokenString.len()); 2537 // Is this a macro? 2538 char* pMacroDefinition = mTokenTable[tok].mpMacroDefinition; 2539 if(pMacroDefinition) { 2540 // Yes, it is a macro 2541 dptr = pMacroDefinition; 2542 dch = ch; 2543 inp(); 2544 next(); 2545 } 2546 } 2547 } else { 2548 inp(); 2549 if (tok == '\'') { 2550 tok = TOK_NUM; 2551 tokc = getq(); 2552 if (ch != '\'') { 2553 error("Expected a ' character, got %c", ch); 2554 } else { 2555 inp(); 2556 } 2557 } else if ((tok == '/') & (ch == '*')) { 2558 inp(); 2559 while (ch && ch != EOF) { 2560 while (ch != '*' && ch != EOF) 2561 inp(); 2562 inp(); 2563 if (ch == '/') 2564 ch = 0; 2565 } 2566 if (ch == EOF) { 2567 error("End of file inside comment."); 2568 } 2569 inp(); 2570 next(); 2571 } else if ((tok == '/') & (ch == '/')) { 2572 inp(); 2573 while (ch && (ch != '\n') && (ch != EOF)) { 2574 inp(); 2575 } 2576 inp(); 2577 next(); 2578 } else { 2579 const char* t = operatorChars; 2580 int opIndex = 0; 2581 while ((l = *t++) != 0) { 2582 a = *t++; 2583 tokl = operatorLevel[opIndex]; 2584 tokc = opIndex; 2585 if ((l == tok) & ((a == ch) | (a == '@'))) { 2586#if 0 2587 printf("%c%c -> tokl=%d tokc=0x%x\n", 2588 l, a, tokl, tokc); 2589#endif 2590 if (a == ch) { 2591 inp(); 2592 tok = TOK_DUMMY; /* dummy token for double tokens */ 2593 } 2594 break; 2595 } 2596 opIndex++; 2597 } 2598 if (l == 0) { 2599 tokl = 0; 2600 tokc = 0; 2601 } 2602 } 2603 } 2604#if 0 2605 { 2606 String buf; 2607 decodeToken(buf, tok); 2608 fprintf(stderr, "%s\n", buf.getUnwrapped()); 2609 } 2610#endif 2611 } 2612 2613 void doDefine() { 2614 next(); 2615 tokenid_t name = tok; 2616 String* pName = new String(); 2617 while (isspace(ch)) { 2618 inp(); 2619 } 2620 if (ch == '(') { 2621 delete pName; 2622 error("Defines with arguments not supported"); 2623 return; 2624 } 2625 while (isspace(ch)) { 2626 inp(); 2627 } 2628 String value; 2629 while (ch != '\n' && ch != EOF) { 2630 value.append(ch); 2631 inp(); 2632 } 2633 char* pDefn = (char*)mGlobalArena.alloc(value.len() + 1); 2634 memcpy(pDefn, value.getUnwrapped(), value.len()); 2635 pDefn[value.len()] = 0; 2636 mTokenTable[name].mpMacroDefinition = pDefn; 2637 } 2638 2639 void doPragma() { 2640 // # pragma name(val) 2641 int state = 0; 2642 while(ch != EOF && ch != '\n' && state < 10) { 2643 switch(state) { 2644 case 0: 2645 if (isspace(ch)) { 2646 inp(); 2647 } else { 2648 state++; 2649 } 2650 break; 2651 case 1: 2652 if (isalnum(ch)) { 2653 mPragmas.append(ch); 2654 inp(); 2655 } else if (ch == '(') { 2656 mPragmas.append(0); 2657 inp(); 2658 state++; 2659 } else { 2660 state = 11; 2661 } 2662 break; 2663 case 2: 2664 if (isalnum(ch)) { 2665 mPragmas.append(ch); 2666 inp(); 2667 } else if (ch == ')') { 2668 mPragmas.append(0); 2669 inp(); 2670 state = 10; 2671 } else { 2672 state = 11; 2673 } 2674 break; 2675 } 2676 } 2677 if(state != 10) { 2678 error("Unexpected pragma syntax"); 2679 } 2680 mPragmaStringCount += 2; 2681 } 2682 2683 virtual void verror(const char* fmt, va_list ap) { 2684 mErrorBuf.printf("%ld: ", file->getLine()); 2685 mErrorBuf.vprintf(fmt, ap); 2686 mErrorBuf.printf("\n"); 2687 } 2688 2689 void skip(intptr_t c) { 2690 if (tok != c) { 2691 error("'%c' expected", c); 2692 } 2693 next(); 2694 } 2695 2696 bool accept(intptr_t c) { 2697 if (tok == c) { 2698 next(); 2699 return true; 2700 } 2701 return false; 2702 } 2703 2704 bool acceptStringLiteral() { 2705 if (tok == '"') { 2706 pGen->li((int) glo, mkpCharPtr); 2707 // This while loop merges multiple adjacent string constants. 2708 while (tok == '"') { 2709 while (ch != '"' && ch != EOF) { 2710 *allocGlobalSpace(1) = getq(); 2711 } 2712 if (ch != '"') { 2713 error("Unterminated string constant."); 2714 } 2715 inp(); 2716 next(); 2717 } 2718 /* Null terminate */ 2719 *glo = 0; 2720 /* align heap */ 2721 allocGlobalSpace((char*) (((intptr_t) glo + 4) & -4) - glo); 2722 2723 return true; 2724 } 2725 return false; 2726 } 2727 /* Parse and evaluate a unary expression. 2728 * allowAssignment is true if '=' parsing wanted (quick hack) 2729 */ 2730 void unary(bool allowAssignment) { 2731 intptr_t n, t, a; 2732 t = 0; 2733 n = 1; /* type of expression 0 = forward, 1 = value, other = lvalue */ 2734 if (acceptStringLiteral()) { 2735 // Nothing else to do. 2736 } else { 2737 int c = tokl; 2738 a = tokc; 2739 double ad = tokd; 2740 t = tok; 2741 next(); 2742 if (t == TOK_NUM) { 2743 pGen->li(a, mkpInt); 2744 } else if (t == TOK_NUM_FLOAT) { 2745 pGen->lif(ad, mkpFloat); 2746 } else if (t == TOK_NUM_DOUBLE) { 2747 pGen->lid(ad, mkpDouble); 2748 } else if (c == 2) { 2749 /* -, +, !, ~ */ 2750 unary(false); 2751 if (t == '!') 2752 pGen->gUnaryCmp(a); 2753 else 2754 pGen->genUnaryOp(a); 2755 } else if (t == '(') { 2756 expr(); 2757 skip(')'); 2758 } else if (t == '*') { 2759 /* This is a pointer dereference, but we currently only 2760 * support a pointer dereference if it's immediately 2761 * in front of a cast. So parse the cast right here. 2762 */ 2763 skip('('); 2764 Type* pCast = expectCastTypeDeclaration(mLocalArena); 2765 // We currently only handle 3 types of cast: 2766 // (int*), (char*) , (int (*)()) 2767 if(typeEqual(pCast, mkpIntPtr)) { 2768 t = TOK_INT; 2769 } else if (typeEqual(pCast, mkpCharPtr)) { 2770 t = TOK_CHAR; 2771 } else if (typeEqual(pCast, mkpPtrIntFn)){ 2772 t = 0; 2773 } else { 2774 String buffer; 2775 decodeType(buffer, pCast); 2776 error("Unsupported cast type %s", buffer.getUnwrapped()); 2777 decodeType(buffer, mkpPtrIntFn); 2778 } 2779 skip(')'); 2780 unary(false); 2781 if (accept('=')) { 2782 pGen->pushR0(); 2783 expr(); 2784 pGen->storeR0ToTOS(pCast); 2785 } else if (t) { 2786 pGen->loadR0FromR0(pCast); 2787 } 2788 // Else we fall through to the function call below, with 2789 // t == 0 to trigger an indirect function call. Hack! 2790 } else if (t == '&') { 2791 VariableInfo* pVI = VI(tok); 2792 pGen->leaR0((int) pVI->pAddress, 2793 createPtrType(pVI->pType, mLocalArena)); 2794 next(); 2795 } else if (t == EOF ) { 2796 error("Unexpected EOF."); 2797 } else if (!checkSymbol(t)) { 2798 // Don't have to do anything special here, the error 2799 // message was printed by checkSymbol() above. 2800 } else { 2801 if (!isDefined(t)) { 2802 mGlobals.add(t); 2803 // printf("Adding new global function %s\n", nameof(t)); 2804 } 2805 VariableInfo* pVI = VI(t); 2806 n = (intptr_t) pVI->pAddress; 2807 /* forward reference: try dlsym */ 2808 if (!n) { 2809 n = (intptr_t) dlsym(RTLD_DEFAULT, nameof(t)); 2810 pVI->pType = mkpIntFn; 2811 pVI->pAddress = (void*) n; 2812 } 2813 if ((tok == '=') & allowAssignment) { 2814 /* assignment */ 2815 next(); 2816 expr(); 2817 pGen->storeR0(n); 2818 } else if (tok != '(') { 2819 /* variable */ 2820 if (!n) { 2821 error("Undefined variable %s", nameof(t)); 2822 } 2823 pGen->loadR0(n, tokl == 11, tokc, pVI->pType); 2824 if (tokl == 11) { 2825 next(); 2826 } 2827 } 2828 } 2829 } 2830 2831 /* function call */ 2832 if (accept('(')) { 2833 if (n == 1) 2834 pGen->pushR0(); 2835 2836 /* push args and invert order */ 2837 a = pGen->beginFunctionCallArguments(); 2838 int l = 0; 2839 while (tok != ')' && tok != EOF) { 2840 expr(); 2841 pGen->storeR0ToArg(l); 2842 l = l + 4; 2843 if (accept(',')) { 2844 // fine 2845 } else if ( tok != ')') { 2846 error("Expected ',' or ')'"); 2847 } 2848 } 2849 pGen->endFunctionCallArguments(a, l); 2850 skip(')'); 2851 if (!n) { 2852 /* forward reference */ 2853 VariableInfo* pVI = VI(t); 2854 pVI->pForward = (void*) pGen->callForward((int) pVI->pForward, 2855 pVI->pType); 2856 } else if (n == 1) { 2857 pGen->callIndirect(l, mkpPtrIntFn->pHead); 2858 } else { 2859 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset(), 2860 VI(t)->pType); 2861 } 2862 pGen->adjustStackAfterCall(l, n == 1); 2863 } 2864 } 2865 2866 /* Recursive descent parser for binary operations. 2867 */ 2868 void binaryOp(int level) { 2869 intptr_t t, n, a; 2870 t = 0; 2871 if (level-- == 1) 2872 unary(true); 2873 else { 2874 binaryOp(level); 2875 a = 0; 2876 while (level == tokl) { 2877 n = tok; 2878 t = tokc; 2879 next(); 2880 2881 if (level > 8) { 2882 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */ 2883 binaryOp(level); 2884 } else { 2885 pGen->pushR0(); 2886 binaryOp(level); 2887 2888 if ((level == 4) | (level == 5)) { 2889 pGen->gcmp(t); 2890 } else { 2891 pGen->genOp(t); 2892 } 2893 } 2894 } 2895 /* && and || output code generation */ 2896 if (a && level > 8) { 2897 a = pGen->gtst(t == OP_LOGICAL_OR, a); 2898 pGen->li(t != OP_LOGICAL_OR, mkpInt); 2899 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */ 2900 pGen->gsym(a); 2901 pGen->li(t == OP_LOGICAL_OR, mkpInt); 2902 } 2903 } 2904 } 2905 2906 void expr() { 2907 binaryOp(11); 2908 } 2909 2910 int test_expr() { 2911 expr(); 2912 return pGen->gtst(0, 0); 2913 } 2914 2915 void block(intptr_t l, bool outermostFunctionBlock) { 2916 intptr_t a, n, t; 2917 2918 Type* pBaseType; 2919 if ((pBaseType = acceptPrimitiveType(mLocalArena))) { 2920 /* declarations */ 2921 localDeclarations(pBaseType); 2922 } else if (tok == TOK_IF) { 2923 next(); 2924 skip('('); 2925 a = test_expr(); 2926 skip(')'); 2927 block(l, false); 2928 if (tok == TOK_ELSE) { 2929 next(); 2930 n = pGen->gjmp(0); /* jmp */ 2931 pGen->gsym(a); 2932 block(l, false); 2933 pGen->gsym(n); /* patch else jmp */ 2934 } else { 2935 pGen->gsym(a); /* patch if test */ 2936 } 2937 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) { 2938 t = tok; 2939 next(); 2940 skip('('); 2941 if (t == TOK_WHILE) { 2942 n = codeBuf.getPC(); // top of loop, target of "next" iteration 2943 a = test_expr(); 2944 } else { 2945 if (tok != ';') 2946 expr(); 2947 skip(';'); 2948 n = codeBuf.getPC(); 2949 a = 0; 2950 if (tok != ';') 2951 a = test_expr(); 2952 skip(';'); 2953 if (tok != ')') { 2954 t = pGen->gjmp(0); 2955 expr(); 2956 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); 2957 pGen->gsym(t); 2958 n = t + 4; 2959 } 2960 } 2961 skip(')'); 2962 block((intptr_t) &a, false); 2963 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */ 2964 pGen->gsym(a); 2965 } else if (tok == '{') { 2966 if (! outermostFunctionBlock) { 2967 mLocals.pushLevel(); 2968 } 2969 next(); 2970 while (tok != '}' && tok != EOF) 2971 block(l, false); 2972 skip('}'); 2973 if (! outermostFunctionBlock) { 2974 mLocals.popLevel(); 2975 } 2976 } else { 2977 if (accept(TOK_RETURN)) { 2978 if (tok != ';') { 2979 expr(); 2980 pGen->convertR0(pReturnType); 2981 } 2982 rsym = pGen->gjmp(rsym); /* jmp */ 2983 } else if (accept(TOK_BREAK)) { 2984 *(int *) l = pGen->gjmp(*(int *) l); 2985 } else if (tok != ';') 2986 expr(); 2987 skip(';'); 2988 } 2989 } 2990 2991 bool typeEqual(Type* a, Type* b) { 2992 if (a == b) { 2993 return true; 2994 } 2995 if (a == NULL || b == NULL) { 2996 return false; 2997 } 2998 TypeTag at = a->tag; 2999 if (at != b->tag) { 3000 return false; 3001 } 3002 if (at == TY_POINTER) { 3003 return typeEqual(a->pHead, b->pHead); 3004 } else if (at == TY_FUNC || at == TY_PARAM) { 3005 return typeEqual(a->pHead, b->pHead) 3006 && typeEqual(a->pTail, b->pTail); 3007 } 3008 return true; 3009 } 3010 3011 Type* createType(TypeTag tag, Type* pHead, Type* pTail, Arena& arena) { 3012 assert(tag >= TY_INT && tag <= TY_PARAM); 3013 Type* pType = (Type*) arena.alloc(sizeof(Type)); 3014 memset(pType, 0, sizeof(*pType)); 3015 pType->tag = tag; 3016 pType->pHead = pHead; 3017 pType->pTail = pTail; 3018 return pType; 3019 } 3020 3021 Type* createPtrType(Type* pType, Arena& arena) { 3022 return createType(TY_POINTER, pType, NULL, arena); 3023 } 3024 3025 /** 3026 * Try to print a type in declaration order 3027 */ 3028 void decodeType(String& buffer, Type* pType) { 3029 buffer.clear(); 3030 if (pType == NULL) { 3031 buffer.appendCStr("null"); 3032 return; 3033 } 3034 decodeTypeImp(buffer, pType); 3035 } 3036 3037 void decodeTypeImp(String& buffer, Type* pType) { 3038 decodeTypeImpPrefix(buffer, pType); 3039 3040 String temp; 3041 if (pType->id != 0) { 3042 decodeToken(temp, pType->id); 3043 buffer.append(temp); 3044 } 3045 3046 decodeTypeImpPostfix(buffer, pType); 3047 } 3048 3049 void decodeTypeImpPrefix(String& buffer, Type* pType) { 3050 TypeTag tag = pType->tag; 3051 3052 if (tag >= TY_INT && tag <= TY_VOID) { 3053 switch (tag) { 3054 case TY_INT: 3055 buffer.appendCStr("int"); 3056 break; 3057 case TY_CHAR: 3058 buffer.appendCStr("char"); 3059 break; 3060 case TY_VOID: 3061 buffer.appendCStr("void"); 3062 break; 3063 case TY_FLOAT: 3064 buffer.appendCStr("float"); 3065 break; 3066 case TY_DOUBLE: 3067 buffer.appendCStr("double"); 3068 break; 3069 default: 3070 break; 3071 } 3072 buffer.append(' '); 3073 } 3074 3075 switch (tag) { 3076 case TY_INT: 3077 break; 3078 case TY_CHAR: 3079 break; 3080 case TY_VOID: 3081 break; 3082 case TY_FLOAT: 3083 break; 3084 case TY_DOUBLE: 3085 break; 3086 case TY_POINTER: 3087 decodeTypeImpPrefix(buffer, pType->pHead); 3088 if(pType->pHead && pType->pHead->tag == TY_FUNC) { 3089 buffer.append('('); 3090 } 3091 buffer.append('*'); 3092 break; 3093 case TY_FUNC: 3094 decodeTypeImp(buffer, pType->pHead); 3095 break; 3096 case TY_PARAM: 3097 decodeTypeImp(buffer, pType->pHead); 3098 break; 3099 default: 3100 String temp; 3101 temp.printf("Unknown tag %d", pType->tag); 3102 buffer.append(temp); 3103 break; 3104 } 3105 } 3106 3107 void decodeTypeImpPostfix(String& buffer, Type* pType) { 3108 TypeTag tag = pType->tag; 3109 3110 switch(tag) { 3111 case TY_POINTER: 3112 if(pType->pHead && pType->pHead->tag == TY_FUNC) { 3113 buffer.append(')'); 3114 } 3115 decodeTypeImpPostfix(buffer, pType->pHead); 3116 break; 3117 case TY_FUNC: 3118 buffer.append('('); 3119 for(Type* pArg = pType->pTail; pArg; pArg = pArg->pTail) { 3120 decodeTypeImp(buffer, pArg); 3121 if (pArg->pTail) { 3122 buffer.appendCStr(", "); 3123 } 3124 } 3125 buffer.append(')'); 3126 break; 3127 default: 3128 break; 3129 } 3130 } 3131 3132 void printType(Type* pType) { 3133 String buffer; 3134 decodeType(buffer, pType); 3135 fprintf(stderr, "%s\n", buffer.getUnwrapped()); 3136 } 3137 3138 Type* acceptPrimitiveType(Arena& arena) { 3139 Type* pType; 3140 if (tok == TOK_INT) { 3141 pType = mkpInt; 3142 } else if (tok == TOK_CHAR) { 3143 pType = mkpChar; 3144 } else if (tok == TOK_VOID) { 3145 pType = mkpVoid; 3146 } else if (tok == TOK_FLOAT) { 3147 pType = mkpFloat; 3148 } else if (tok == TOK_DOUBLE) { 3149 pType = mkpDouble; 3150 } else { 3151 return NULL; 3152 } 3153 next(); 3154 return pType; 3155 } 3156 3157 Type* acceptDeclaration(Type* pType, bool nameAllowed, bool nameRequired, 3158 Arena& arena) { 3159 tokenid_t declName = 0; 3160 pType = acceptDecl2(pType, declName, nameAllowed, 3161 nameRequired, arena); 3162 if (declName) { 3163 // Clone the parent type so we can set a unique ID 3164 pType = createType(pType->tag, pType->pHead, 3165 pType->pTail, arena); 3166 3167 pType->id = declName; 3168 } 3169 // fprintf(stderr, "Parsed a declaration: "); 3170 // printType(pType); 3171 return pType; 3172 } 3173 3174 Type* expectDeclaration(Type* pBaseType, Arena& arena) { 3175 Type* pType = acceptDeclaration(pBaseType, true, true, arena); 3176 if (! pType) { 3177 error("Expected a declaration"); 3178 } 3179 return pType; 3180 } 3181 3182 /* Used for accepting types that appear in casts */ 3183 Type* acceptCastTypeDeclaration(Arena& arena) { 3184 Type* pType = acceptPrimitiveType(arena); 3185 if (pType) { 3186 pType = acceptDeclaration(pType, false, false, arena); 3187 } 3188 return pType; 3189 } 3190 3191 Type* expectCastTypeDeclaration(Arena& arena) { 3192 Type* pType = acceptCastTypeDeclaration(arena); 3193 if (! pType) { 3194 error("Expected a declaration"); 3195 } 3196 return pType; 3197 } 3198 3199 Type* acceptDecl2(Type* pType, tokenid_t& declName, 3200 bool nameAllowed, bool nameRequired, Arena& arena) { 3201 int ptrCounter = 0; 3202 while (accept('*')) { 3203 ptrCounter++; 3204 } 3205 pType = acceptDecl3(pType, declName, nameAllowed, nameRequired, arena); 3206 while (ptrCounter-- > 0) { 3207 pType = createType(TY_POINTER, pType, NULL, arena); 3208 } 3209 return pType; 3210 } 3211 3212 Type* acceptDecl3(Type* pType, tokenid_t& declName, 3213 bool nameAllowed, bool nameRequired, Arena& arena) { 3214 // direct-dcl : 3215 // name 3216 // (dcl) 3217 // direct-dcl() 3218 // direct-dcl[] 3219 Type* pNewHead = NULL; 3220 if (accept('(')) { 3221 pNewHead = acceptDecl2(pNewHead, declName, nameAllowed, 3222 nameRequired, arena); 3223 skip(')'); 3224 } else if ((declName = acceptSymbol()) != 0) { 3225 if (nameAllowed == false && declName) { 3226 error("Symbol %s not allowed here", nameof(declName)); 3227 } else if (nameRequired && ! declName) { 3228 String temp; 3229 decodeToken(temp, tok); 3230 error("Expected symbol. Got %s", temp.getUnwrapped()); 3231 } 3232 } 3233 while (accept('(')) { 3234 // Function declaration 3235 Type* pTail = acceptArgs(nameAllowed, arena); 3236 pType = createType(TY_FUNC, pType, pTail, arena); 3237 skip(')'); 3238 } 3239 3240 if (pNewHead) { 3241 Type* pA = pNewHead; 3242 while (pA->pHead) { 3243 pA = pA->pHead; 3244 } 3245 pA->pHead = pType; 3246 pType = pNewHead; 3247 } 3248 return pType; 3249 } 3250 3251 Type* acceptArgs(bool nameAllowed, Arena& arena) { 3252 Type* pHead = NULL; 3253 Type* pTail = NULL; 3254 for(;;) { 3255 Type* pBaseArg = acceptPrimitiveType(arena); 3256 if (pBaseArg) { 3257 Type* pArg = acceptDeclaration(pBaseArg, nameAllowed, false, 3258 arena); 3259 if (pArg) { 3260 Type* pParam = createType(TY_PARAM, pArg, NULL, arena); 3261 if (!pHead) { 3262 pHead = pParam; 3263 pTail = pParam; 3264 } else { 3265 pTail->pTail = pParam; 3266 pTail = pParam; 3267 } 3268 } 3269 } 3270 if (! accept(',')) { 3271 break; 3272 } 3273 } 3274 return pHead; 3275 } 3276 3277 Type* expectPrimitiveType(Arena& arena) { 3278 Type* pType = acceptPrimitiveType(arena); 3279 if (!pType) { 3280 String buf; 3281 decodeToken(buf, tok); 3282 error("Expected a type, got %s", buf.getUnwrapped()); 3283 } 3284 return pType; 3285 } 3286 3287 void addGlobalSymbol(Type* pDecl) { 3288 tokenid_t t = pDecl->id; 3289 VariableInfo* pVI = VI(t); 3290 if(pVI && pVI->pAddress) { 3291 reportDuplicate(t); 3292 } 3293 mGlobals.add(pDecl); 3294 } 3295 3296 void reportDuplicate(tokenid_t t) { 3297 error("Duplicate definition of %s", nameof(t)); 3298 } 3299 3300 void addLocalSymbol(Type* pDecl) { 3301 tokenid_t t = pDecl->id; 3302 if (mLocals.isDefinedAtCurrentLevel(t)) { 3303 reportDuplicate(t); 3304 } 3305 mLocals.add(pDecl); 3306 } 3307 3308 void localDeclarations(Type* pBaseType) { 3309 intptr_t a; 3310 3311 while (pBaseType) { 3312 while (tok != ';' && tok != EOF) { 3313 Type* pDecl = expectDeclaration(pBaseType, mLocalArena); 3314 if (!pDecl) { 3315 break; 3316 } 3317 int variableAddress = 0; 3318 addLocalSymbol(pDecl); 3319 loc = loc + pGen->sizeOf(pDecl); 3320 loc = loc + 4; 3321 variableAddress = -loc; 3322 VI(pDecl->id)->pAddress = (void*) variableAddress; 3323 if (accept('=')) { 3324 /* assignment */ 3325 expr(); 3326 pGen->storeR0(variableAddress); 3327 } 3328 if (tok == ',') 3329 next(); 3330 } 3331 skip(';'); 3332 pBaseType = acceptPrimitiveType(mLocalArena); 3333 } 3334 } 3335 3336 bool checkSymbol() { 3337 return checkSymbol(tok); 3338 } 3339 3340 void decodeToken(String& buffer, tokenid_t token) { 3341 if (token == EOF ) { 3342 buffer.printf("EOF"); 3343 } else if (token == TOK_NUM) { 3344 buffer.printf("numeric constant"); 3345 } else if (token >= 0 && token < 256) { 3346 if (token < 32) { 3347 buffer.printf("'\\x%02x'", token); 3348 } else { 3349 buffer.printf("'%c'", token); 3350 } 3351 } else if (token >= TOK_KEYWORD && token < TOK_SYMBOL) { 3352 buffer.printf("keyword \"%s\"", nameof(token)); 3353 } else { 3354 buffer.printf("symbol \"%s\"", nameof(token)); 3355 } 3356 } 3357 3358 bool checkSymbol(tokenid_t token) { 3359 bool result = token >= TOK_SYMBOL; 3360 if (!result) { 3361 String temp; 3362 decodeToken(temp, token); 3363 error("Expected symbol. Got %s", temp.getUnwrapped()); 3364 } 3365 return result; 3366 } 3367 3368 tokenid_t acceptSymbol() { 3369 tokenid_t result = 0; 3370 if (tok >= TOK_SYMBOL) { 3371 result = tok; 3372 next(); 3373 } 3374 return result; 3375 } 3376 3377 void globalDeclarations() { 3378 while (tok != EOF) { 3379 Type* pBaseType = expectPrimitiveType(mGlobalArena); 3380 if (!pBaseType) { 3381 break; 3382 } 3383 Type* pDecl = expectDeclaration(pBaseType, mGlobalArena); 3384 if (!pDecl) { 3385 break; 3386 } 3387 if (! isDefined(pDecl->id)) { 3388 addGlobalSymbol(pDecl); 3389 } 3390 VariableInfo* name = VI(pDecl->id); 3391 if (name && name->pAddress) { 3392 error("Already defined global %s", nameof(pDecl->id)); 3393 } 3394 if (pDecl->tag < TY_FUNC) { 3395 // it's a variable declaration 3396 for(;;) { 3397 if (name && !name->pAddress) { 3398 name->pAddress = (int*) allocGlobalSpace(4); 3399 } 3400 if (accept('=')) { 3401 if (tok == TOK_NUM) { 3402 if (name) { 3403 * (int*) name->pAddress = tokc; 3404 } 3405 next(); 3406 } else { 3407 error("Expected an integer constant"); 3408 } 3409 } 3410 if (!accept(',')) { 3411 break; 3412 } 3413 pDecl = expectDeclaration(pBaseType, mGlobalArena); 3414 if (!pDecl) { 3415 break; 3416 } 3417 if (! isDefined(pDecl->id)) { 3418 addGlobalSymbol(pDecl); 3419 } 3420 name = VI(pDecl->id); 3421 } 3422 skip(';'); 3423 } else { 3424 // Function declaration 3425 if (accept(';')) { 3426 // forward declaration. 3427 } else { 3428 if (name) { 3429 /* patch forward references (XXX: does not work for function 3430 pointers) */ 3431 pGen->gsym((int) name->pForward); 3432 /* put function address */ 3433 name->pAddress = (void*) codeBuf.getPC(); 3434 } 3435 // Calculate stack offsets for parameters 3436 mLocals.pushLevel(); 3437 intptr_t a = 8; 3438 int argCount = 0; 3439 for (Type* pP = pDecl->pTail; pP; pP = pP->pTail) { 3440 Type* pArg = pP->pHead; 3441 addLocalSymbol(pArg); 3442 /* read param name and compute offset */ 3443 VI(pArg->id)->pAddress = (void*) a; 3444 a = a + 4; 3445 argCount++; 3446 } 3447 rsym = loc = 0; 3448 pReturnType = pDecl->pHead; 3449 a = pGen->functionEntry(argCount); 3450 block(0, true); 3451 pGen->gsym(rsym); 3452 pGen->functionExit(argCount, a, loc); 3453 mLocals.popLevel(); 3454 } 3455 } 3456 } 3457 } 3458 3459 char* allocGlobalSpace(int bytes) { 3460 if (glo - pGlobalBase + bytes > ALLOC_SIZE) { 3461 error("Global space exhausted"); 3462 return NULL; 3463 } 3464 char* result = glo; 3465 glo += bytes; 3466 return result; 3467 } 3468 3469 void cleanup() { 3470 if (pGlobalBase != 0) { 3471 free(pGlobalBase); 3472 pGlobalBase = 0; 3473 } 3474 if (pGen) { 3475 delete pGen; 3476 pGen = 0; 3477 } 3478 if (file) { 3479 delete file; 3480 file = 0; 3481 } 3482 } 3483 3484 void clear() { 3485 tok = 0; 3486 tokc = 0; 3487 tokl = 0; 3488 ch = 0; 3489 rsym = 0; 3490 loc = 0; 3491 glo = 0; 3492 dptr = 0; 3493 dch = 0; 3494 file = 0; 3495 pGlobalBase = 0; 3496 pGen = 0; 3497 mPragmaStringCount = 0; 3498 } 3499 3500 void setArchitecture(const char* architecture) { 3501 delete pGen; 3502 pGen = 0; 3503 3504 if (architecture != NULL) { 3505#ifdef PROVIDE_ARM_CODEGEN 3506 if (! pGen && strcmp(architecture, "arm") == 0) { 3507 pGen = new ARMCodeGenerator(); 3508 } 3509#endif 3510#ifdef PROVIDE_X86_CODEGEN 3511 if (! pGen && strcmp(architecture, "x86") == 0) { 3512 pGen = new X86CodeGenerator(); 3513 } 3514#endif 3515 if (!pGen ) { 3516 error("Unknown architecture %s\n", architecture); 3517 } 3518 } 3519 3520 if (pGen == NULL) { 3521#if defined(DEFAULT_ARM_CODEGEN) 3522 pGen = new ARMCodeGenerator(); 3523#elif defined(DEFAULT_X86_CODEGEN) 3524 pGen = new X86CodeGenerator(); 3525#endif 3526 } 3527 if (pGen == NULL) { 3528 error("No code generator defined."); 3529 } else { 3530 pGen->setErrorSink(this); 3531 } 3532 } 3533 3534public: 3535 struct args { 3536 args() { 3537 architecture = 0; 3538 } 3539 const char* architecture; 3540 }; 3541 3542 Compiler() { 3543 clear(); 3544 } 3545 3546 ~Compiler() { 3547 cleanup(); 3548 } 3549 3550 int compile(const char* text, size_t textLength) { 3551 int result; 3552 3553 cleanup(); 3554 clear(); 3555 mTokenTable.setArena(&mGlobalArena); 3556 mGlobals.setArena(&mGlobalArena); 3557 mGlobals.setTokenTable(&mTokenTable); 3558 mLocals.setArena(&mLocalArena); 3559 mLocals.setTokenTable(&mTokenTable); 3560 3561 internKeywords(); 3562 createPrimitiveTypes(); 3563 codeBuf.init(ALLOC_SIZE); 3564 setArchitecture(NULL); 3565 if (!pGen) { 3566 return -1; 3567 } 3568#ifdef PROVIDE_TRACE_CODEGEN 3569 pGen = new TraceCodeGenerator(pGen); 3570#endif 3571 pGen->setErrorSink(this); 3572 pGen->init(&codeBuf); 3573 file = new TextInputStream(text, textLength); 3574 pGlobalBase = (char*) calloc(1, ALLOC_SIZE); 3575 glo = pGlobalBase; 3576 inp(); 3577 next(); 3578 globalDeclarations(); 3579 checkForUndefinedForwardReferences(); 3580 result = pGen->finishCompile(); 3581 if (result == 0) { 3582 if (mErrorBuf.len()) { 3583 result = -2; 3584 } 3585 } 3586 return result; 3587 } 3588 3589 void createPrimitiveTypes() { 3590 mkpInt = createType(TY_INT, NULL, NULL, mGlobalArena); 3591 mkpChar = createType(TY_CHAR, NULL, NULL, mGlobalArena); 3592 mkpVoid = createType(TY_VOID, NULL, NULL, mGlobalArena); 3593 mkpFloat = createType(TY_FLOAT, NULL, NULL, mGlobalArena); 3594 mkpDouble = createType(TY_DOUBLE, NULL, NULL, mGlobalArena); 3595 mkpIntFn = createType(TY_FUNC, mkpInt, NULL, mGlobalArena); 3596 mkpIntPtr = createPtrType(mkpInt, mGlobalArena); 3597 mkpCharPtr = createPtrType(mkpChar, mGlobalArena); 3598 mkpPtrIntFn = createPtrType(mkpIntFn, mGlobalArena); 3599 } 3600 3601 void checkForUndefinedForwardReferences() { 3602 mGlobals.forEach(static_ufrcFn, this); 3603 } 3604 3605 static bool static_ufrcFn(VariableInfo* value, void* context) { 3606 Compiler* pCompiler = (Compiler*) context; 3607 return pCompiler->undefinedForwardReferenceCheck(value); 3608 } 3609 3610 bool undefinedForwardReferenceCheck(VariableInfo* value) { 3611 if (!value->pAddress && value->pForward) { 3612 error("Undefined forward reference: %s", 3613 mTokenTable[value->tok].pText); 3614 } 3615 return true; 3616 } 3617 3618 int dump(FILE* out) { 3619 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out); 3620 return 0; 3621 } 3622 3623 int disassemble(FILE* out) { 3624 return pGen->disassemble(out); 3625 } 3626 3627 /* Look through the symbol table to find a symbol. 3628 * If found, return its value. 3629 */ 3630 void* lookup(const char* name) { 3631 tokenid_t tok = mTokenTable.intern(name, strlen(name)); 3632 VariableInfo* pVariableInfo = VI(tok); 3633 if (pVariableInfo) { 3634 return pVariableInfo->pAddress; 3635 } 3636 return NULL; 3637 } 3638 3639 void getPragmas(ACCsizei* actualStringCount, 3640 ACCsizei maxStringCount, ACCchar** strings) { 3641 int stringCount = mPragmaStringCount; 3642 if (actualStringCount) { 3643 *actualStringCount = stringCount; 3644 } 3645 if (stringCount > maxStringCount) { 3646 stringCount = maxStringCount; 3647 } 3648 if (strings) { 3649 char* pPragmas = mPragmas.getUnwrapped(); 3650 while (stringCount-- > 0) { 3651 *strings++ = pPragmas; 3652 pPragmas += strlen(pPragmas) + 1; 3653 } 3654 } 3655 } 3656 3657 char* getErrorMessage() { 3658 return mErrorBuf.getUnwrapped(); 3659 } 3660 3661}; 3662 3663const char* Compiler::operatorChars = 3664 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@"; 3665 3666const char Compiler::operatorLevel[] = 3667 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 3668 5, 5, /* ==, != */ 3669 9, 10, /* &&, || */ 3670 6, 7, 8, /* & ^ | */ 3671 2, 2 /* ~ ! */ 3672 }; 3673 3674#ifdef PROVIDE_ARM_CODEGEN 3675FILE* Compiler::ARMCodeGenerator::disasmOut; 3676#endif 3677 3678#ifdef PROVIDE_X86_CODEGEN 3679const int Compiler::X86CodeGenerator::operatorHelper[] = { 3680 0x1, // ++ 3681 0xff, // -- 3682 0xc1af0f, // * 3683 0xf9f79991, // / 3684 0xf9f79991, // % (With manual assist to swap results) 3685 0xc801, // + 3686 0xd8f7c829, // - 3687 0xe0d391, // << 3688 0xf8d391, // >> 3689 0xe, // <= 3690 0xd, // >= 3691 0xc, // < 3692 0xf, // > 3693 0x4, // == 3694 0x5, // != 3695 0x0, // && 3696 0x1, // || 3697 0xc821, // & 3698 0xc831, // ^ 3699 0xc809, // | 3700 0xd0f7, // ~ 3701 0x4 // ! 3702}; 3703#endif 3704 3705struct ACCscript { 3706 ACCscript() { 3707 text = 0; 3708 textLength = 0; 3709 accError = ACC_NO_ERROR; 3710 } 3711 3712 ~ACCscript() { 3713 delete text; 3714 } 3715 3716 void setError(ACCenum error) { 3717 if (accError == ACC_NO_ERROR && error != ACC_NO_ERROR) { 3718 accError = error; 3719 } 3720 } 3721 3722 ACCenum getError() { 3723 ACCenum result = accError; 3724 accError = ACC_NO_ERROR; 3725 return result; 3726 } 3727 3728 Compiler compiler; 3729 char* text; 3730 int textLength; 3731 ACCenum accError; 3732}; 3733 3734 3735extern "C" 3736ACCscript* accCreateScript() { 3737 return new ACCscript(); 3738} 3739 3740extern "C" 3741ACCenum accGetError( ACCscript* script ) { 3742 return script->getError(); 3743} 3744 3745extern "C" 3746void accDeleteScript(ACCscript* script) { 3747 delete script; 3748} 3749 3750extern "C" 3751void accScriptSource(ACCscript* script, 3752 ACCsizei count, 3753 const ACCchar ** string, 3754 const ACCint * length) { 3755 int totalLength = 0; 3756 for(int i = 0; i < count; i++) { 3757 int len = -1; 3758 const ACCchar* s = string[i]; 3759 if (length) { 3760 len = length[i]; 3761 } 3762 if (len < 0) { 3763 len = strlen(s); 3764 } 3765 totalLength += len; 3766 } 3767 delete script->text; 3768 char* text = new char[totalLength + 1]; 3769 script->text = text; 3770 script->textLength = totalLength; 3771 char* dest = text; 3772 for(int i = 0; i < count; i++) { 3773 int len = -1; 3774 const ACCchar* s = string[i]; 3775 if (length) { 3776 len = length[i]; 3777 } 3778 if (len < 0) { 3779 len = strlen(s); 3780 } 3781 memcpy(dest, s, len); 3782 dest += len; 3783 } 3784 text[totalLength] = '\0'; 3785} 3786 3787extern "C" 3788void accCompileScript(ACCscript* script) { 3789 int result = script->compiler.compile(script->text, script->textLength); 3790 if (result) { 3791 script->setError(ACC_INVALID_OPERATION); 3792 } 3793} 3794 3795extern "C" 3796void accGetScriptiv(ACCscript* script, 3797 ACCenum pname, 3798 ACCint * params) { 3799 switch (pname) { 3800 case ACC_INFO_LOG_LENGTH: 3801 *params = 0; 3802 break; 3803 } 3804} 3805 3806extern "C" 3807void accGetScriptInfoLog(ACCscript* script, 3808 ACCsizei maxLength, 3809 ACCsizei * length, 3810 ACCchar * infoLog) { 3811 char* message = script->compiler.getErrorMessage(); 3812 int messageLength = strlen(message) + 1; 3813 if (length) { 3814 *length = messageLength; 3815 } 3816 if (infoLog && maxLength > 0) { 3817 int trimmedLength = maxLength < messageLength ? 3818 maxLength : messageLength; 3819 memcpy(infoLog, message, trimmedLength); 3820 infoLog[trimmedLength] = 0; 3821 } 3822} 3823 3824extern "C" 3825void accGetScriptLabel(ACCscript* script, const ACCchar * name, 3826 ACCvoid ** address) { 3827 void* value = script->compiler.lookup(name); 3828 if (value) { 3829 *address = value; 3830 } else { 3831 script->setError(ACC_INVALID_VALUE); 3832 } 3833} 3834 3835extern "C" 3836void accGetPragmas(ACCscript* script, ACCsizei* actualStringCount, 3837 ACCsizei maxStringCount, ACCchar** strings){ 3838 script->compiler.getPragmas(actualStringCount, maxStringCount, strings); 3839} 3840 3841extern "C" 3842void accDisassemble(ACCscript* script) { 3843 script->compiler.disassemble(stderr); 3844} 3845 3846 3847} // namespace acc 3848 3849