acc.cpp revision 69796b6c847dd15bdbc1d0a563e421562c2c56f3
1/* 2 Obfuscated Tiny C Compiler 3 4 Copyright (C) 2001-2003 Fabrice Bellard 5 6 This software is provided 'as-is', without any express or implied 7 warranty. In no event will the authors be held liable for any damages 8 arising from the use of this software. 9 10 Permission is granted to anyone to use this software for any purpose, 11 including commercial applications, and to alter it and redistribute it 12 freely, subject to the following restrictions: 13 14 1. The origin of this software must not be misrepresented; you must not 15 claim that you wrote the original software. If you use this software 16 in a product, an acknowledgment in the product and its documentation 17 *is* required. 18 2. Altered source versions must be plainly marked as such, and must not be 19 misrepresented as being the original software. 20 3. This notice may not be removed or altered from any source distribution. 21 */ 22 23#include <ctype.h> 24#include <dlfcn.h> 25#include <stdarg.h> 26#include <stdio.h> 27#include <stdlib.h> 28#include <string.h> 29 30#if defined(__arm__) 31#include <unistd.h> 32#endif 33 34#include "disassem.h" 35 36namespace acc { 37 38class compiler { 39 class CodeBuf { 40 char* ind; 41 char* pProgramBase; 42 43 void release() { 44 if (pProgramBase != 0) { 45 free(pProgramBase); 46 pProgramBase = 0; 47 } 48 } 49 50 public: 51 CodeBuf() { 52 pProgramBase = 0; 53 ind = 0; 54 } 55 56 ~CodeBuf() { 57 release(); 58 } 59 60 void init(int size) { 61 release(); 62 pProgramBase = (char*) calloc(1, size); 63 ind = pProgramBase; 64 } 65 66 void o(int n) { 67 /* cannot use unsigned, so we must do a hack */ 68 while (n && n != -1) { 69 *ind++ = n; 70 n = n >> 8; 71 } 72 } 73 74 int o4(int n) { 75 int result = (int) ind; 76 * (int*) ind = n; 77 ind += 4; 78 return result; 79 } 80 81 /* 82 * Output a byte. Handles all values, 0..ff. 83 */ 84 void ob(int n) { 85 *ind++ = n; 86 } 87 88 /* output a symbol and patch all calls to it */ 89 void gsym(int t) { 90 int n; 91 while (t) { 92 n = *(int *) t; /* next value */ 93 *(int *) t = ((int) ind) - t - 4; 94 t = n; 95 } 96 } 97 98 /* psym is used to put an instruction with a data field which is a 99 reference to a symbol. It is in fact the same as oad ! */ 100 int psym(int n, int t) { 101 return oad(n, t); 102 } 103 104 /* instruction + address */ 105 int oad(int n, int t) { 106 o(n); 107 *(int *) ind = t; 108 t = (int) ind; 109 ind = ind + 4; 110 return t; 111 } 112 113 inline void* getBase() { 114 return (void*) pProgramBase; 115 } 116 117 int getSize() { 118 return ind - pProgramBase; 119 } 120 121 int getPC() { 122 return (int) ind; 123 } 124 }; 125 126 class CodeGenerator { 127 public: 128 CodeGenerator() {} 129 virtual ~CodeGenerator() {} 130 131 virtual void init(CodeBuf* pCodeBuf) { 132 this->pCodeBuf = pCodeBuf; 133 } 134 135 /* returns address to patch with local variable size 136 */ 137 virtual int functionEntry(int argCount) = 0; 138 139 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) = 0; 140 141 /* load immediate value */ 142 virtual void li(int t) = 0; 143 144 virtual int gjmp(int t) = 0; 145 146 /* l = 0: je, l == 1: jne */ 147 virtual int gtst(bool l, int t) = 0; 148 149 virtual void gcmp(int op) = 0; 150 151 virtual void genOp(int op) = 0; 152 153 virtual void clearECX() = 0; 154 155 virtual void pushEAX() = 0; 156 157 virtual void popECX() = 0; 158 159 virtual void storeEAXToAddressECX(bool isInt) = 0; 160 161 virtual void loadEAXIndirect(bool isInt) = 0; 162 163 virtual void leaEAX(int ea) = 0; 164 165 virtual void storeEAX(int ea) = 0; 166 167 virtual void loadEAX(int ea) = 0; 168 169 virtual void postIncrementOrDecrement(int n, int op) = 0; 170 171 virtual int beginFunctionCallArguments() = 0; 172 173 virtual void endFunctionCallArguments(int a, int l) = 0; 174 175 virtual void storeEAToArg(int l) = 0; 176 177 virtual int callForward(int symbol) = 0; 178 179 virtual void callRelative(int t) = 0; 180 181 virtual void callIndirect(int l) = 0; 182 183 virtual void adjustStackAfterCall(int l) = 0; 184 185 virtual int disassemble(FILE* out) = 0; 186 187 /* output a symbol and patch all calls to it */ 188 virtual void gsym(int t) { 189 pCodeBuf->gsym(t); 190 } 191 192 virtual int finishCompile() { 193#if defined(__arm__) 194 const long base = long(pCodeBuf->getBase()); 195 const long curr = base + long(pCodeBuf->getSize()); 196 int err = cacheflush(base, curr, 0); 197 return err; 198#else 199 return 0; 200#endif 201 } 202 203 /** 204 * Adjust relative branches by this amount. 205 */ 206 virtual int jumpOffset() = 0; 207 208 protected: 209 void o(int n) { 210 pCodeBuf->o(n); 211 } 212 213 /* 214 * Output a byte. Handles all values, 0..ff. 215 */ 216 void ob(int n) { 217 pCodeBuf->ob(n); 218 } 219 220 /* psym is used to put an instruction with a data field which is a 221 reference to a symbol. It is in fact the same as oad ! */ 222 int psym(int n, int t) { 223 return oad(n, t); 224 } 225 226 /* instruction + address */ 227 int oad(int n, int t) { 228 return pCodeBuf->oad(n,t); 229 } 230 231 int getBase() { 232 return (int) pCodeBuf->getBase(); 233 } 234 235 int getPC() { 236 return pCodeBuf->getPC(); 237 } 238 239 int o4(int data) { 240 return pCodeBuf->o4(data); 241 } 242 private: 243 CodeBuf* pCodeBuf; 244 }; 245 246 class ARMCodeGenerator : public CodeGenerator { 247 public: 248 ARMCodeGenerator() {} 249 virtual ~ARMCodeGenerator() {} 250 251 /* returns address to patch with local variable size 252 */ 253 virtual int functionEntry(int argCount) { 254 fprintf(stderr, "functionEntry(%d);\n", argCount); 255 // sp -> arg4 arg5 ... 256 // Push our register-based arguments back on the stack 257 if (argCount > 0) { 258 int regArgCount = argCount <= 4 ? argCount : 4; 259 o4(0xE92D0000 | ((1 << argCount) - 1)); // stmfd sp!, {} 260 } 261 // sp -> arg0 arg1 ... 262 o4(0xE92D4800); // stmfd sp!, {fp, lr} 263 // sp, fp -> oldfp, retadr, arg0 arg1 .... 264 o4(0xE1A0B00D); // mov fp, sp 265 return o4(0xE24DD000); // sub sp, sp, # <local variables> 266 } 267 268 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 269 fprintf(stderr, "functionExit(%d, %d, %d);\n", argCount, localVariableAddress, localVariableSize); 270 // Patch local variable allocation code: 271 if (localVariableSize < 0 || localVariableSize > 255) { 272 error("LocalVariableSize"); 273 } 274 *(char*) (localVariableAddress) = localVariableSize; 275 276 // sp -> locals .... fp -> oldfp, retadr, arg0, arg1, ... 277 o4(0xE1A0E00B); // mov lr, fp 278 o4(0xE59BB000); // ldr fp, [fp] 279 o4(0xE28ED004); // add sp, lr, #4 280 // sp -> retadr, arg0, ... 281 o4(0xE8BD4000); // ldmfd sp!, {lr} 282 // sp -> arg0 .... 283 if (argCount > 0) { 284 // We store the PC into the lr so we can adjust the sp before 285 // returning. (We need to pull off the registers we pushed 286 // earlier. We don't need to actually store them anywhere, 287 // just adjust the stack. 288 int regArgCount = argCount <= 4 ? argCount : 4; 289 o4(0xE28DD000 | (regArgCount << 2)); // add sp, sp, #argCount << 2 290 } 291 o4(0xE12FFF1E); // bx lr 292 } 293 294 /* load immediate value */ 295 virtual void li(int t) { 296 fprintf(stderr, "li(%d);\n", t); 297 if (t >= 0 && t < 255) { 298 o4(0xE3A00000 + t); // mov r0, #0 299 } else if (t >= -256 && t < 0) { 300 // mvn means move constant ^ ~0 301 o4(0xE3E00001 - t); // mvn r0, #0 302 } else { 303 o4(0xE51F0000); // ldr r0, .L3 304 o4(0xEA000000); // b .L99 305 o4(t); // .L3: .word 0 306 // .L99: 307 } 308 } 309 310 virtual int gjmp(int t) { 311 fprintf(stderr, "gjmp(%d);\n", t); 312 return o4(0xEA000000 + encodeAddress(t)); // b .L33 313 } 314 315 /* l = 0: je, l == 1: jne */ 316 virtual int gtst(bool l, int t) { 317 fprintf(stderr, "gtst(%d, %d);\n", l, t); 318 error("Unimplemented"); 319 o(0x0fc085); /* test %eax, %eax, je/jne xxx */ 320 return psym(0x84 + l, t); 321 } 322 323 virtual void gcmp(int op) { 324 fprintf(stderr, "gcmp(%d);\n", op); 325 error("Unimplemented"); 326#if 0 327 int t = decodeOp(op); 328 o(0xc139); /* cmp %eax,%ecx */ 329 li(0); 330 o(0x0f); /* setxx %al */ 331 o(t + 0x90); 332 o(0xc0); 333#endif 334 } 335 336 virtual void genOp(int op) { 337 fprintf(stderr, "genOp(%d);\n", op); 338 switch(op) { 339 case OP_MUL: 340 o4(0x0E0000091); // mul r0,r1,r0 341 break; 342 case OP_PLUS: 343 o4(0xE0810000); // add r0,r1,r0 344 break; 345 case OP_MINUS: 346 o4(0xE0410000); // sub r0,r1,r0 347 break; 348 case OP_SHIFT_LEFT: 349 o4(0xE1A00011); // lsl r0,r1,r0 350 break; 351 case OP_SHIFT_RIGHT: 352 o4(0xE1A00051); // asr r0,r1,r0 353 break; 354 case OP_BIT_AND: 355 o4(0xE0010000); // and r0,r1,r0 356 break; 357 case OP_BIT_XOR: 358 o4(0xE0210000); // eor r0,r1,r0 359 break; 360 case OP_BIT_OR: 361 o4(0xE1810000); // orr r0,r1,r0 362 break; 363 case OP_BIT_NOT: 364 o4(0xE1E00000); // mvn r0, r0 365 break; 366 default: 367 error("Unimplemented op %d\n", op); 368 break; 369 } 370#if 0 371 o(decodeOp(op)); 372 if (op == OP_MOD) 373 o(0x92); /* xchg %edx, %eax */ 374#endif 375 } 376 377 virtual void clearECX() { 378 fprintf(stderr, "clearECX();\n"); 379 o4(0xE3A01000); // mov r1, #0 380 } 381 382 virtual void pushEAX() { 383 fprintf(stderr, "pushEAX();\n"); 384 o4(0xE92D0001); // stmfd sp!,{r0} 385 } 386 387 virtual void popECX() { 388 fprintf(stderr, "popECX();\n"); 389 o4(0xE8BD0002); // ldmfd sp!,{r1} 390 } 391 392 virtual void storeEAXToAddressECX(bool isInt) { 393 fprintf(stderr, "storeEAXToAddressECX(%d);\n", isInt); 394 o4(0x0188 + isInt); /* movl %eax/%al, (%ecx) */ 395 } 396 397 virtual void loadEAXIndirect(bool isInt) { 398 fprintf(stderr, "loadEAXIndirect(%d);\n", isInt); 399 if (isInt) 400 o4(0xE5900000); // ldr r0, [r0] 401 else 402 o4(0xE5D00000); // ldrb r0, [r0] 403 } 404 405 virtual void leaEAX(int ea) { 406 fprintf(stderr, "[!!! fixme !!!] leaEAX(%d);\n", ea); 407 error("Unimplemented"); 408 if (ea < -4095 || ea > 4095) { 409 error("Offset out of range: %08x", ea); 410 } 411 o4(0xE59B0000 | (0x1fff & ea)); //ldr r0, [fp,#ea] 412 } 413 414 virtual void storeEAX(int ea) { 415 fprintf(stderr, "storeEAX(%d);\n", ea); 416 int fpOffset = ea; 417 if (fpOffset < -4095 || fpOffset > 4095) { 418 error("Offset out of range: %08x", ea); 419 } 420 if (fpOffset < 0) { 421 o4(0xE50B0000 | (0xfff & (-fpOffset))); // str r0, [fp,#-ea] 422 } else { 423 o4(0xE58B0000 | (0xfff & fpOffset)); // str r0, [fp,#ea] 424 } 425 } 426 427 virtual void loadEAX(int ea) { 428 fprintf(stderr, "loadEAX(%d);\n", ea); 429 int fpOffset = ea; 430 if (fpOffset < -4095 || fpOffset > 4095) { 431 error("Offset out of range: %08x", ea); 432 } 433 if (fpOffset < 0) { 434 o4(0xE51B0000 | (0xfff & (-fpOffset))); // ldr r0, [fp,#-ea] 435 } else { 436 o4(0xE59B0000 | (0xfff & fpOffset)); //ldr r0, [fp,#ea] 437 } 438 } 439 440 virtual void postIncrementOrDecrement(int n, int op) { 441 fprintf(stderr, "postIncrementOrDecrement(%d, %d);\n", n, op); 442 /* Implement post-increment or post decrement. 443 */ 444 445 error("Unimplemented"); 446#if 0 447 gmov(0, n); /* 83 ADD */ 448 o(decodeOp(op)); 449#endif 450 } 451 452 virtual int beginFunctionCallArguments() { 453 fprintf(stderr, "beginFunctionCallArguments();\n"); 454 return o4(0xE24DDF00); // Placeholder 455 } 456 457 virtual void endFunctionCallArguments(int a, int l) { 458 fprintf(stderr, "endFunctionCallArguments(0x%08x, %d);\n", a, l); 459 if (l < 0 || l > 0x3FC) { 460 error("L out of range for stack adjustment: 0x%08x", l); 461 } 462 * (int*) a = 0xE24DDF00 | (l >> 2); // sub sp, sp, #0 << 2 463 int argCount = l >> 2; 464 if (argCount > 0) { 465 int regArgCount = argCount > 4 ? 4 : argCount; 466 o4(0xE8BD0000 | ((1 << regArgCount) - 1)); // ldmfd sp!,{} 467 } 468 } 469 470 virtual void storeEAToArg(int l) { 471 fprintf(stderr, "storeEAToArg(%d);\n", l); 472 if (l < 0 || l > 4096-4) { 473 error("l out of range for stack offset: 0x%08x", l); 474 } 475 o4(0xE58D0000 + l); // str r0, [sp, #4] 476 } 477 478 virtual int callForward(int symbol) { 479 fprintf(stderr, "callForward(%d);\n", symbol); 480 // Forward calls are always short (local) 481 return o4(0xEB000000 | encodeAddress(symbol)); 482 } 483 484 virtual void callRelative(int t) { 485 fprintf(stderr, "callRelative(%d);\n", t); 486 int abs = t + getPC() + jumpOffset(); 487 fprintf(stderr, "abs=%d (0x08%x)\n", abs, abs); 488 if (t >= - (1 << 25) && t < (1 << 25)) { 489 o4(0xEB000000 | encodeAddress(t)); 490 } else { 491 // Long call. 492 o4(0xE59FC000); // ldr r12, .L1 493 o4(0xEA000000); // b .L99 494 o4(t - 16); // .L1: .word 0 495 o4(0xE08CC00F); // .L99: add r12,pc 496 o4(0xE12FFF3C); // blx r12 497 } 498 } 499 500 virtual void callIndirect(int l) { 501 fprintf(stderr, "callIndirect(%d);\n", l); 502 oad(0x2494ff, l); /* call *xxx(%esp) */ 503 } 504 505 virtual void adjustStackAfterCall(int l) { 506 fprintf(stderr, "adjustStackAfterCall(%d);\n", l); 507 if (l < 0 || l > 0x3FC) { 508 error("L out of range for stack adjustment: 0x%08x", l); 509 } 510 int argCount = l >> 2; 511 if (argCount > 4) { 512 int remainingArgs = argCount - 4; 513 o4(0xE28DDF00 | remainingArgs); // add sp, sp, #0x3fc 514 } 515 516 } 517 518 virtual int jumpOffset() { 519 return 4; 520 } 521 522 /* output a symbol and patch all calls to it */ 523 virtual void gsym(int t) { 524 fprintf(stderr, "gsym(0x%x)\n", t); 525 int n; 526 int base = getBase(); 527 int pc = getPC(); 528 fprintf(stderr, "pc = 0x%x\n", pc); 529 while (t) { 530 int data = * (int*) t; 531 int decodedOffset = ((BRANCH_REL_ADDRESS_MASK & data) << 2); 532 if (decodedOffset == 0) { 533 n = 0; 534 } else { 535 n = base + decodedOffset; /* next value */ 536 } 537 *(int *) t = (data & ~BRANCH_REL_ADDRESS_MASK) 538 | encodeRelAddress(pc - t - 8); 539 t = n; 540 } 541 } 542 543 virtual int disassemble(FILE* out) { 544 disasmOut = out; 545 disasm_interface_t di; 546 di.di_readword = disassemble_readword; 547 di.di_printaddr = disassemble_printaddr; 548 di.di_printf = disassemble_printf; 549 550 int base = getBase(); 551 int pc = getPC(); 552 for(int i = base; i < pc; i += 4) { 553 fprintf(out, "%08x: %08x ", i, *(int*) i); 554 ::disasm(&di, i, 0); 555 } 556 return 0; 557 } 558 private: 559 static FILE* disasmOut; 560 561 static u_int 562 disassemble_readword(u_int address) 563 { 564 return(*((u_int *)address)); 565 } 566 567 static void 568 disassemble_printaddr(u_int address) 569 { 570 fprintf(disasmOut, "0x%08x", address); 571 } 572 573 static void 574 disassemble_printf(const char *fmt, ...) { 575 va_list ap; 576 va_start(ap, fmt); 577 vfprintf(disasmOut, fmt, ap); 578 va_end(ap); 579 } 580 581 static const int BRANCH_REL_ADDRESS_MASK = 0x00ffffff; 582 583 /** Encode a relative address that might also be 584 * a label. 585 */ 586 int encodeAddress(int value) { 587 int base = getBase(); 588 if (value >= base && value <= getPC() ) { 589 // This is a label, encode it relative to the base. 590 value = value - base; 591 } 592 return encodeRelAddress(value); 593 } 594 595 int encodeRelAddress(int value) { 596 return BRANCH_REL_ADDRESS_MASK & (value >> 2); 597 } 598 599 void error(const char* fmt,...) { 600 va_list ap; 601 va_start(ap, fmt); 602 vfprintf(stderr, fmt, ap); 603 va_end(ap); 604 exit(12); 605 } 606 }; 607 608 class X86CodeGenerator : public CodeGenerator { 609 public: 610 X86CodeGenerator() {} 611 virtual ~X86CodeGenerator() {} 612 613 /* returns address to patch with local variable size 614 */ 615 virtual int functionEntry(int argCount) { 616 o(0xe58955); /* push %ebp, mov %esp, %ebp */ 617 return oad(0xec81, 0); /* sub $xxx, %esp */ 618 } 619 620 virtual void functionExit(int argCount, int localVariableAddress, int localVariableSize) { 621 o(0xc3c9); /* leave, ret */ 622 *(int *) localVariableAddress = localVariableSize; /* save local variables */ 623 } 624 625 /* load immediate value */ 626 virtual void li(int t) { 627 oad(0xb8, t); /* mov $xx, %eax */ 628 } 629 630 virtual int gjmp(int t) { 631 return psym(0xe9, t); 632 } 633 634 /* l = 0: je, l == 1: jne */ 635 virtual int gtst(bool l, int t) { 636 o(0x0fc085); /* test %eax, %eax, je/jne xxx */ 637 return psym(0x84 + l, t); 638 } 639 640 virtual void gcmp(int op) { 641 int t = decodeOp(op); 642 o(0xc139); /* cmp %eax,%ecx */ 643 li(0); 644 o(0x0f); /* setxx %al */ 645 o(t + 0x90); 646 o(0xc0); 647 } 648 649 virtual void genOp(int op) { 650 o(decodeOp(op)); 651 if (op == OP_MOD) 652 o(0x92); /* xchg %edx, %eax */ 653 } 654 655 virtual void clearECX() { 656 oad(0xb9, 0); /* movl $0, %ecx */ 657 } 658 659 virtual void pushEAX() { 660 o(0x50); /* push %eax */ 661 } 662 663 virtual void popECX() { 664 o(0x59); /* pop %ecx */ 665 } 666 667 virtual void storeEAXToAddressECX(bool isInt) { 668 o(0x0188 + isInt); /* movl %eax/%al, (%ecx) */ 669 } 670 671 virtual void loadEAXIndirect(bool isInt) { 672 if (isInt) 673 o(0x8b); /* mov (%eax), %eax */ 674 else 675 o(0xbe0f); /* movsbl (%eax), %eax */ 676 ob(0); /* add zero in code */ 677 } 678 679 virtual void leaEAX(int ea) { 680 gmov(10, ea); /* leal EA, %eax */ 681 } 682 683 virtual void storeEAX(int ea) { 684 gmov(6, ea); /* mov %eax, EA */ 685 } 686 687 virtual void loadEAX(int ea) { 688 gmov(8, ea); /* mov EA, %eax */ 689 } 690 691 virtual void postIncrementOrDecrement(int n, int op) { 692 /* Implement post-increment or post decrement. 693 */ 694 gmov(0, n); /* 83 ADD */ 695 o(decodeOp(op)); 696 } 697 698 virtual int beginFunctionCallArguments() { 699 return oad(0xec81, 0); /* sub $xxx, %esp */ 700 } 701 702 virtual void endFunctionCallArguments(int a, int l) { 703 * (int*) a = l; 704 } 705 706 virtual void storeEAToArg(int l) { 707 oad(0x248489, l); /* movl %eax, xxx(%esp) */ 708 } 709 710 virtual int callForward(int symbol) { 711 return psym(0xe8, symbol); /* call xxx */ 712 } 713 714 virtual void callRelative(int t) { 715 psym(0xe8, t); /* call xxx */ 716 } 717 718 virtual void callIndirect(int l) { 719 oad(0x2494ff, l); /* call *xxx(%esp) */ 720 } 721 722 virtual void adjustStackAfterCall(int l) { 723 oad(0xc481, l); /* add $xxx, %esp */ 724 } 725 726 virtual int jumpOffset() { 727 return 5; 728 } 729 730 virtual int disassemble(FILE* out) { 731 return 1; 732 } 733 734 private: 735 static const int operatorHelper[]; 736 737 int decodeOp(int op) { 738 if (op < 0 || op > OP_COUNT) { 739 fprintf(stderr, "Out-of-range operator: %d\n", op); 740 exit(1); 741 } 742 return operatorHelper[op]; 743 } 744 745 void gmov(int l, int t) { 746 o(l + 0x83); 747 oad((t < LOCAL) << 7 | 5, t); 748 } 749 }; 750 751 /* vars: value of variables 752 loc : local variable index 753 glo : global variable index 754 ind : output code ptr 755 rsym: return symbol 756 prog: output code 757 dstk: define stack 758 dptr, dch: macro state 759 */ 760 int tok, tokc, tokl, ch, vars, rsym, loc, glo, sym_stk, dstk, 761 dptr, dch, last_id; 762 void* pSymbolBase; 763 void* pGlobalBase; 764 void* pVarsBase; 765 FILE* file; 766 767 CodeBuf codeBuf; 768 CodeGenerator* pGen; 769 770 static const int ALLOC_SIZE = 99999; 771 772 /* depends on the init string */ 773 static const int TOK_STR_SIZE = 48; 774 static const int TOK_IDENT = 0x100; 775 static const int TOK_INT = 0x100; 776 static const int TOK_IF = 0x120; 777 static const int TOK_ELSE = 0x138; 778 static const int TOK_WHILE = 0x160; 779 static const int TOK_BREAK = 0x190; 780 static const int TOK_RETURN = 0x1c0; 781 static const int TOK_FOR = 0x1f8; 782 static const int TOK_DEFINE = 0x218; 783 static const int TOK_MAIN = 0x250; 784 785 static const int TOK_DUMMY = 1; 786 static const int TOK_NUM = 2; 787 788 static const int LOCAL = 0x200; 789 790 static const int SYM_FORWARD = 0; 791 static const int SYM_DEFINE = 1; 792 793 /* tokens in string heap */ 794 static const int TAG_TOK = ' '; 795 static const int TAG_MACRO = 2; 796 797 static const int OP_INCREMENT = 0; 798 static const int OP_DECREMENT = 1; 799 static const int OP_MUL = 2; 800 static const int OP_DIV = 3; 801 static const int OP_MOD = 4; 802 static const int OP_PLUS = 5; 803 static const int OP_MINUS = 6; 804 static const int OP_SHIFT_LEFT = 7; 805 static const int OP_SHIFT_RIGHT = 8; 806 static const int OP_LESS_EQUAL = 9; 807 static const int OP_GREATER_EQUAL = 10; 808 static const int OP_LESS = 11; 809 static const int OP_GREATER = 12; 810 static const int OP_EQUALS = 13; 811 static const int OP_NOT_EQUALS = 14; 812 static const int OP_LOGICAL_AND = 15; 813 static const int OP_LOGICAL_OR = 16; 814 static const int OP_BIT_AND = 17; 815 static const int OP_BIT_XOR = 18; 816 static const int OP_BIT_OR = 19; 817 static const int OP_BIT_NOT = 20; 818 static const int OP_LOGICAL_NOT = 21; 819 static const int OP_COUNT = 22; 820 821 /* Operators are searched from front, the two-character operators appear 822 * before the single-character operators with the same first character. 823 * @ is used to pad out single-character operators. 824 */ 825 static const char* operatorChars; 826 static const char operatorLevel[]; 827 828 void pdef(int t) { 829 *(char *) dstk++ = t; 830 } 831 832 void inp() { 833 if (dptr) { 834 ch = *(char *) dptr++; 835 if (ch == TAG_MACRO) { 836 dptr = 0; 837 ch = dch; 838 } 839 } else 840 ch = fgetc(file); 841 /* printf("ch=%c 0x%x\n", ch, ch); */ 842 } 843 844 int isid() { 845 return isalnum(ch) | (ch == '_'); 846 } 847 848 /* read a character constant */ 849 void getq() { 850 if (ch == '\\') { 851 inp(); 852 if (ch == 'n') 853 ch = '\n'; 854 } 855 } 856 857 void next() { 858 int l, a; 859 860 while (isspace(ch) | (ch == '#')) { 861 if (ch == '#') { 862 inp(); 863 next(); 864 if (tok == TOK_DEFINE) { 865 next(); 866 pdef(TAG_TOK); /* fill last ident tag */ 867 *(int *) tok = SYM_DEFINE; 868 *(int *) (tok + 4) = dstk; /* define stack */ 869 } 870 /* well we always save the values ! */ 871 while (ch != '\n') { 872 pdef(ch); 873 inp(); 874 } 875 pdef(ch); 876 pdef(TAG_MACRO); 877 } 878 inp(); 879 } 880 tokl = 0; 881 tok = ch; 882 /* encode identifiers & numbers */ 883 if (isid()) { 884 pdef(TAG_TOK); 885 last_id = dstk; 886 while (isid()) { 887 pdef(ch); 888 inp(); 889 } 890 if (isdigit(tok)) { 891 tokc = strtol((char*) last_id, 0, 0); 892 tok = TOK_NUM; 893 } else { 894 *(char *) dstk = TAG_TOK; /* no need to mark end of string (we 895 suppose data is initialized to zero by calloc) */ 896 tok = (int) (strstr((char*) sym_stk, (char*) (last_id - 1)) 897 - sym_stk); 898 *(char *) dstk = 0; /* mark real end of ident for dlsym() */ 899 tok = tok * 8 + TOK_IDENT; 900 if (tok > TOK_DEFINE) { 901 tok = vars + tok; 902 /* printf("tok=%s %x\n", last_id, tok); */ 903 /* define handling */ 904 if (*(int *) tok == SYM_DEFINE) { 905 dptr = *(int *) (tok + 4); 906 dch = ch; 907 inp(); 908 next(); 909 } 910 } 911 } 912 } else { 913 inp(); 914 if (tok == '\'') { 915 tok = TOK_NUM; 916 getq(); 917 tokc = ch; 918 inp(); 919 inp(); 920 } else if ((tok == '/') & (ch == '*')) { 921 inp(); 922 while (ch) { 923 while (ch != '*') 924 inp(); 925 inp(); 926 if (ch == '/') 927 ch = 0; 928 } 929 inp(); 930 next(); 931 } else { 932 const char* t = operatorChars; 933 int opIndex = 0; 934 while ((l = *t++) != 0) { 935 a = *t++; 936 tokl = operatorLevel[opIndex]; 937 tokc = opIndex; 938 if ((l == tok) & ((a == ch) | (a == '@'))) { 939#if 0 940 printf("%c%c -> tokl=%d tokc=0x%x\n", 941 l, a, tokl, tokc); 942#endif 943 if (a == ch) { 944 inp(); 945 tok = TOK_DUMMY; /* dummy token for double tokens */ 946 } 947 break; 948 } 949 opIndex++; 950 } 951 if (l == 0) { 952 tokl = 0; 953 tokc = 0; 954 } 955 } 956 } 957#if 0 958 { 959 int p; 960 961 printf("tok=0x%x ", tok); 962 if (tok >= TOK_IDENT) { 963 printf("'"); 964 if (tok> TOK_DEFINE) 965 p = sym_stk + 1 + (tok - vars - TOK_IDENT) / 8; 966 else 967 p = sym_stk + 1 + (tok - TOK_IDENT) / 8; 968 while (*(char *)p != TAG_TOK && *(char *)p) 969 printf("%c", *(char *)p++); 970 printf("'\n"); 971 } else if (tok == TOK_NUM) { 972 printf("%d\n", tokc); 973 } else { 974 printf("'%c'\n", tok); 975 } 976 } 977#endif 978 } 979 980 void error(const char *fmt, ...) { 981 va_list ap; 982 983 va_start(ap, fmt); 984 fprintf(stderr, "%ld: ", ftell((FILE *) file)); 985 vfprintf(stderr, fmt, ap); 986 fprintf(stderr, "\n"); 987 va_end(ap); 988 exit(1); 989 } 990 991 void skip(int c) { 992 if (tok != c) { 993 error("'%c' expected", c); 994 } 995 next(); 996 } 997 998 /* l is one if '=' parsing wanted (quick hack) */ 999 void unary(int l) { 1000 int n, t, a, c; 1001 t = 0; 1002 n = 1; /* type of expression 0 = forward, 1 = value, other = 1003 lvalue */ 1004 if (tok == '\"') { 1005 pGen->li(glo); 1006 while (ch != '\"') { 1007 getq(); 1008 *(char *) glo++ = ch; 1009 inp(); 1010 } 1011 *(char *) glo = 0; 1012 glo = (glo + 4) & -4; /* align heap */ 1013 inp(); 1014 next(); 1015 } else { 1016 c = tokl; 1017 a = tokc; 1018 t = tok; 1019 next(); 1020 if (t == TOK_NUM) { 1021 pGen->li(a); 1022 } else if (c == 2) { 1023 /* -, +, !, ~ */ 1024 unary(0); 1025 pGen->clearECX(); 1026 if (t == '!') 1027 pGen->gcmp(a); 1028 else 1029 pGen->genOp(a); 1030 } else if (t == '(') { 1031 expr(); 1032 skip(')'); 1033 } else if (t == '*') { 1034 /* parse cast */ 1035 skip('('); 1036 t = tok; /* get type */ 1037 next(); /* skip int/char/void */ 1038 next(); /* skip '*' or '(' */ 1039 if (tok == '*') { 1040 /* function type */ 1041 skip('*'); 1042 skip(')'); 1043 skip('('); 1044 skip(')'); 1045 t = 0; 1046 } 1047 skip(')'); 1048 unary(0); 1049 if (tok == '=') { 1050 next(); 1051 pGen->pushEAX(); 1052 expr(); 1053 pGen->popECX(); 1054 pGen->storeEAXToAddressECX(t == TOK_INT); 1055 } else if (t) { 1056 pGen->loadEAXIndirect(t == TOK_INT); 1057 } 1058 } else if (t == '&') { 1059 pGen->leaEAX(*(int *) tok); 1060 next(); 1061 } else { 1062 n = *(int *) t; 1063 /* forward reference: try dlsym */ 1064 if (!n) { 1065 n = (int) dlsym(RTLD_DEFAULT, (char*) last_id); 1066 } 1067 if ((tok == '=') & l) { 1068 /* assignment */ 1069 next(); 1070 expr(); 1071 pGen->storeEAX(n); 1072 } else if (tok != '(') { 1073 /* variable */ 1074 pGen->loadEAX(n); 1075 if (tokl == 11) { 1076 pGen->postIncrementOrDecrement(n, tokc); 1077 next(); 1078 } 1079 } 1080 } 1081 } 1082 1083 /* function call */ 1084 if (tok == '(') { 1085 if (n == 1) 1086 pGen->pushEAX(); 1087 1088 /* push args and invert order */ 1089 a = pGen->beginFunctionCallArguments(); 1090 next(); 1091 l = 0; 1092 while (tok != ')') { 1093 expr(); 1094 pGen->storeEAToArg(l); 1095 if (tok == ',') 1096 next(); 1097 l = l + 4; 1098 } 1099 pGen->endFunctionCallArguments(a, l); 1100 next(); 1101 if (!n) { 1102 /* forward reference */ 1103 t = t + 4; 1104 *(int *) t = pGen->callForward(*(int *) t); 1105 } else if (n == 1) { 1106 pGen->callIndirect(l); 1107 l = l + 4; 1108 } else { 1109 pGen->callRelative(n - codeBuf.getPC() - pGen->jumpOffset()); /* call xxx */ 1110 } 1111 if (l) 1112 pGen->adjustStackAfterCall(l); 1113 } 1114 } 1115 1116 void sum(int l) { 1117 int t, n, a; 1118 t = 0; 1119 if (l-- == 1) 1120 unary(1); 1121 else { 1122 sum(l); 1123 a = 0; 1124 while (l == tokl) { 1125 n = tok; 1126 t = tokc; 1127 next(); 1128 1129 if (l > 8) { 1130 a = pGen->gtst(t == OP_LOGICAL_OR, a); /* && and || output code generation */ 1131 sum(l); 1132 } else { 1133 pGen->pushEAX(); 1134 sum(l); 1135 pGen->popECX(); 1136 1137 if ((l == 4) | (l == 5)) { 1138 pGen->gcmp(t); 1139 } else { 1140 pGen->genOp(t); 1141 } 1142 } 1143 } 1144 /* && and || output code generation */ 1145 if (a && l > 8) { 1146 a = pGen->gtst(t == OP_LOGICAL_OR, a); 1147 pGen->li(t != OP_LOGICAL_OR); 1148 pGen->gjmp(5); /* jmp $ + 5 (sizeof li, FIXME for ARM) */ 1149 pGen->gsym(a); 1150 pGen->li(t == OP_LOGICAL_OR); 1151 } 1152 } 1153 } 1154 1155 void expr() { 1156 sum(11); 1157 } 1158 1159 int test_expr() { 1160 expr(); 1161 return pGen->gtst(0, 0); 1162 } 1163 1164 void block(int l) { 1165 int a, n, t; 1166 1167 if (tok == TOK_IF) { 1168 next(); 1169 skip('('); 1170 a = test_expr(); 1171 skip(')'); 1172 block(l); 1173 if (tok == TOK_ELSE) { 1174 next(); 1175 n = pGen->gjmp(0); /* jmp */ 1176 pGen->gsym(a); 1177 block(l); 1178 pGen->gsym(n); /* patch else jmp */ 1179 } else { 1180 pGen->gsym(a); /* patch if test */ 1181 } 1182 } else if ((tok == TOK_WHILE) | (tok == TOK_FOR)) { 1183 t = tok; 1184 next(); 1185 skip('('); 1186 if (t == TOK_WHILE) { 1187 n = codeBuf.getPC(); // top of loop, target of "next" iteration 1188 a = test_expr(); 1189 } else { 1190 if (tok != ';') 1191 expr(); 1192 skip(';'); 1193 n = codeBuf.getPC(); 1194 a = 0; 1195 if (tok != ';') 1196 a = test_expr(); 1197 skip(';'); 1198 if (tok != ')') { 1199 t = pGen->gjmp(0); 1200 expr(); 1201 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); 1202 pGen->gsym(t); 1203 n = t + 4; 1204 } 1205 } 1206 skip(')'); 1207 block((int) &a); 1208 pGen->gjmp(n - codeBuf.getPC() - pGen->jumpOffset()); /* jmp */ 1209 pGen->gsym(a); 1210 } else if (tok == '{') { 1211 next(); 1212 /* declarations */ 1213 decl(1); 1214 while (tok != '}') 1215 block(l); 1216 next(); 1217 } else { 1218 if (tok == TOK_RETURN) { 1219 next(); 1220 if (tok != ';') 1221 expr(); 1222 rsym = pGen->gjmp(rsym); /* jmp */ 1223 } else if (tok == TOK_BREAK) { 1224 next(); 1225 *(int *) l = pGen->gjmp(*(int *) l); 1226 } else if (tok != ';') 1227 expr(); 1228 skip(';'); 1229 } 1230 } 1231 1232 /* 'l' is true if local declarations */ 1233 void decl(int l) { 1234 int a; 1235 1236 while ((tok == TOK_INT) | ((tok != -1) & (!l))) { 1237 if (tok == TOK_INT) { 1238 next(); 1239 while (tok != ';') { 1240 if (l) { 1241 loc = loc + 4; 1242 *(int *) tok = -loc; 1243 } else { 1244 *(int *) tok = glo; 1245 glo = glo + 4; 1246 } 1247 next(); 1248 if (tok == ',') 1249 next(); 1250 } 1251 skip(';'); 1252 } else { 1253 /* patch forward references (XXX: do not work for function 1254 pointers) */ 1255 pGen->gsym(*(int *) (tok + 4)); 1256 /* put function address */ 1257 *(int *) tok = codeBuf.getPC(); 1258 next(); 1259 skip('('); 1260 a = 8; 1261 int argCount = 0; 1262 while (tok != ')') { 1263 /* read param name and compute offset */ 1264 *(int *) tok = a; 1265 a = a + 4; 1266 next(); 1267 if (tok == ',') 1268 next(); 1269 argCount++; 1270 } 1271 next(); /* skip ')' */ 1272 rsym = loc = 0; 1273 a = pGen->functionEntry(argCount); 1274 block(0); 1275 pGen->gsym(rsym); 1276 pGen->functionExit(argCount, a, loc); 1277 } 1278 } 1279 } 1280 1281 void cleanup() { 1282 if (sym_stk != 0) { 1283 free((void*) sym_stk); 1284 sym_stk = 0; 1285 } 1286 if (pGlobalBase != 0) { 1287 free((void*) pGlobalBase); 1288 pGlobalBase = 0; 1289 } 1290 if (pVarsBase != 0) { 1291 free(pVarsBase); 1292 pVarsBase = 0; 1293 } 1294 if (pGen) { 1295 delete pGen; 1296 pGen = 0; 1297 } 1298 } 1299 1300 void clear() { 1301 tok = 0; 1302 tokc = 0; 1303 tokl = 0; 1304 ch = 0; 1305 vars = 0; 1306 rsym = 0; 1307 loc = 0; 1308 glo = 0; 1309 sym_stk = 0; 1310 dstk = 0; 1311 dptr = 0; 1312 dch = 0; 1313 last_id = 0; 1314 file = 0; 1315 pGlobalBase = 0; 1316 pVarsBase = 0; 1317 pGen = 0; 1318 } 1319 1320 void setArchitecture(const char* architecture) { 1321 delete pGen; 1322 pGen = 0; 1323 1324 if (architecture != NULL) { 1325 if (strcmp(architecture, "arm") == 0) { 1326 pGen = new ARMCodeGenerator(); 1327 } else if (strcmp(architecture, "x86") == 0) { 1328 pGen = new X86CodeGenerator(); 1329 } else { 1330 fprintf(stderr, "Unknown architecture %s", architecture); 1331 } 1332 } 1333 1334 if (pGen == NULL) { 1335 pGen = new ARMCodeGenerator(); 1336 } 1337 } 1338 1339public: 1340 struct args { 1341 args() { 1342 architecture = 0; 1343 } 1344 const char* architecture; 1345 }; 1346 1347 compiler() { 1348 clear(); 1349 } 1350 1351 ~compiler() { 1352 cleanup(); 1353 } 1354 1355 int compile(FILE* in, args& args) { 1356 cleanup(); 1357 clear(); 1358 codeBuf.init(ALLOC_SIZE); 1359 setArchitecture(args.architecture); 1360 pGen->init(&codeBuf); 1361 file = in; 1362 sym_stk = (int) calloc(1, ALLOC_SIZE); 1363 dstk = (int) strcpy((char*) sym_stk, 1364 " int if else while break return for define main ") 1365 + TOK_STR_SIZE; 1366 pGlobalBase = calloc(1, ALLOC_SIZE); 1367 glo = (int) pGlobalBase; 1368 pVarsBase = calloc(1, ALLOC_SIZE); 1369 vars = (int) pVarsBase; 1370 inp(); 1371 next(); 1372 decl(0); 1373 pGen->finishCompile(); 1374 return 0; 1375 } 1376 1377 int run(int argc, char** argv) { 1378 typedef int (*mainPtr)(int argc, char** argv); 1379 mainPtr aMain = (mainPtr) *(int*) (vars + TOK_MAIN); 1380 if (!aMain) { 1381 fprintf(stderr, "Could not find function \"main\".\n"); 1382 return -1; 1383 } 1384 return aMain(argc, argv); 1385 } 1386 1387 int dump(FILE* out) { 1388 fwrite(codeBuf.getBase(), 1, codeBuf.getSize(), out); 1389 return 0; 1390 } 1391 1392 int disassemble(FILE* out) { 1393 return pGen->disassemble(out); 1394 } 1395 1396}; 1397 1398const char* compiler::operatorChars = 1399 "++--*@/@%@+@-@<<>><=>=<@>@==!=&&||&@^@|@~@!@"; 1400 1401const char compiler::operatorLevel[] = 1402 {11, 11, 1, 1, 1, 2, 2, 3, 3, 4, 4, 4, 4, 1403 5, 5, /* ==, != */ 1404 9, 10, /* &&, || */ 1405 6, 7, 8, /* & ^ | */ 1406 2, 2 /* ~ ! */ 1407 }; 1408 1409FILE* compiler::ARMCodeGenerator::disasmOut; 1410 1411const int compiler::X86CodeGenerator::operatorHelper[] = { 1412 0x1, // ++ 1413 0xff, // -- 1414 0xc1af0f, // * 1415 0xf9f79991, // / 1416 0xf9f79991, // % (With manual assist to swap results) 1417 0xc801, // + 1418 0xd8f7c829, // - 1419 0xe0d391, // << 1420 0xf8d391, // >> 1421 0xe, // <= 1422 0xd, // >= 1423 0xc, // < 1424 0xf, // > 1425 0x4, // == 1426 0x5, // != 1427 0x0, // && 1428 0x1, // || 1429 0xc821, // & 1430 0xc831, // ^ 1431 0xc809, // | 1432 0xd0f7, // ~ 1433 0x4 // ! 1434}; 1435 1436} // namespace acc 1437 1438// This is a separate function so it can easily be set by breakpoint in gdb. 1439int run(acc::compiler& c, int argc, char** argv) { 1440 return c.run(argc, argv); 1441} 1442 1443int main(int argc, char** argv) { 1444 bool doDump = false; 1445 bool doDisassemble = false; 1446 const char* inFile = NULL; 1447 const char* outFile = NULL; 1448 const char* architecture = "arm"; 1449 int i; 1450 for (i = 1; i < argc; i++) { 1451 char* arg = argv[i]; 1452 if (arg[0] == '-') { 1453 switch (arg[1]) { 1454 case 'a': 1455 if (i + 1 >= argc) { 1456 fprintf(stderr, "Expected architecture after -a\n"); 1457 return 2; 1458 } 1459 architecture = argv[i+1]; 1460 i += 1; 1461 break; 1462 case 'd': 1463 if (i + 1 >= argc) { 1464 fprintf(stderr, "Expected filename after -d\n"); 1465 return 2; 1466 } 1467 doDump = true; 1468 outFile = argv[i + 1]; 1469 i += 1; 1470 break; 1471 case 'S': 1472 doDisassemble = true; 1473 break; 1474 default: 1475 fprintf(stderr, "Unrecognized flag %s\n", arg); 1476 return 3; 1477 } 1478 } else if (inFile == NULL) { 1479 inFile = arg; 1480 } else { 1481 break; 1482 } 1483 } 1484 1485 FILE* in = stdin; 1486 if (inFile) { 1487 in = fopen(inFile, "r"); 1488 if (!in) { 1489 fprintf(stderr, "Could not open input file %s\n", inFile); 1490 return 1; 1491 } 1492 } 1493 acc::compiler compiler; 1494 acc::compiler::args args; 1495 args.architecture = architecture; 1496 int compileResult = compiler.compile(in, args); 1497 if (in != stdin) { 1498 fclose(in); 1499 } 1500 if (compileResult) { 1501 fprintf(stderr, "Compile failed: %d\n", compileResult); 1502 return 6; 1503 } 1504 if (doDisassemble) { 1505 compiler.disassemble(stderr); 1506 } 1507 if (doDump) { 1508 FILE* save = fopen(outFile, "w"); 1509 if (!save) { 1510 fprintf(stderr, "Could not open output file %s\n", outFile); 1511 return 5; 1512 } 1513 compiler.dump(save); 1514 fclose(save); 1515 } else { 1516 fprintf(stderr, "Executing compiled code:\n"); 1517 int codeArgc = argc - i + 1; 1518 char** codeArgv = argv + i - 1; 1519 codeArgv[0] = (char*) (inFile ? inFile : "stdin"); 1520 int result = run(compiler, codeArgc, codeArgv); 1521 fprintf(stderr, "result: %d\n", result); 1522 return result; 1523 } 1524 1525 return 0; 1526} 1527