1 2/* HOW TO USE 3 413 Dec '05 - Linker no longer used (apart from mymalloc) 5Simply compile and link switchback.c with test_xxx.c, 6e.g. for ppc64: 7$ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c 8 9Test file test_xxx.c must have an entry point called "entry", 10which expects to take a single argument which is a function pointer 11(to "serviceFn"). 12 13Test file may not reference any other symbols. 14 15NOTE: POWERPC: it is critical, when using this on ppc, to set 16CacheLineSize to the right value. Values we currently know of: 17 18 imac (G3): 32 19 G5 (ppc970): 128 20*/ 21 22#include <stdio.h> 23#include <assert.h> 24#include <stdlib.h> 25#include <sys/types.h> 26#include <sys/stat.h> 27#include <unistd.h> 28 29#include "../pub/libvex_basictypes.h" 30#include "../pub/libvex_guest_x86.h" 31#include "../pub/libvex_guest_amd64.h" 32#include "../pub/libvex_guest_ppc32.h" 33#include "../pub/libvex_guest_ppc64.h" 34#include "../pub/libvex.h" 35#include "../pub/libvex_trc_values.h" 36#include "linker.h" 37 38static ULong n_bbs_done = 0; 39static Int n_translations_made = 0; 40 41 42#if defined(__i386__) 43# define VexGuestState VexGuestX86State 44# define LibVEX_Guest_initialise LibVEX_GuestX86_initialise 45# define VexArch VexArchX86 46# define VexSubArch VexSubArchX86_sse1 47# define GuestPC guest_EIP 48# define CacheLineSize 0/*irrelevant*/ 49#elif defined(__x86_64__) 50# define VexGuestState VexGuestAMD64State 51# define LibVEX_Guest_initialise LibVEX_GuestAMD64_initialise 52# define VexArch VexArchAMD64 53# define VexSubArch VexSubArch_NONE 54# define GuestPC guest_RIP 55# define CacheLineSize 0/*irrelevant*/ 56#elif defined(__powerpc__) 57 58#if !defined(__powerpc64__) // ppc32 59# define VexGuestState VexGuestPPC32State 60# define LibVEX_Guest_initialise LibVEX_GuestPPC32_initialise 61# define VexArch VexArchPPC32 62# define VexSubArch VexSubArchPPC32_FI 63# define GuestPC guest_CIA 64# define CacheLineSize 128 65#else 66# define VexGuestState VexGuestPPC64State 67# define LibVEX_Guest_initialise LibVEX_GuestPPC64_initialise 68# define VexArch VexArchPPC64 69# define VexSubArch VexSubArchPPC64_FI 70# define GuestPC guest_CIA 71# define CacheLineSize 128 72#endif 73 74#else 75# error "Unknown arch" 76#endif 77 78/* 7: show conversion into IR */ 79/* 6: show after initial opt */ 80/* 5: show after instrumentation */ 81/* 4: show after second opt */ 82/* 3: show after tree building */ 83/* 2: show selected insns */ 84/* 1: show after reg-alloc */ 85/* 0: show final assembly */ 86#define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0) 87#define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0) 88 89 90/* guest state */ 91UInt gstack[50000]; 92VexGuestState gst; 93VexControl vcon; 94 95/* only used for the switchback transition */ 96/* i386: helper1 = &gst, helper2 = %EFLAGS */ 97/* amd64: helper1 = &gst, helper2 = %EFLAGS */ 98/* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */ 99HWord sb_helper1 = 0; 100HWord sb_helper2 = 0; 101HWord sb_helper3 = 0; 102 103/* translation cache */ 104#define N_TRANS_CACHE 1000000 105#define N_TRANS_TABLE 10000 106 107ULong trans_cache[N_TRANS_CACHE]; 108VexGuestExtents trans_table [N_TRANS_TABLE]; 109ULong* trans_tableP[N_TRANS_TABLE]; 110 111Int trans_cache_used = 0; 112Int trans_table_used = 0; 113 114static Bool chase_into_ok ( Addr64 dst ) { return False; } 115 116#if 0 117// local_sys_write_stderr(&c,1); 118static void local_sys_write_stderr ( HChar* buf, Int n ) 119{ 120 UInt __res; 121 __asm__ volatile ( 122 "li %%r0,4\n\t" /* set %r0 = __NR_write */ 123 "li %%r3,1\n\t" /* set %r3 = stdout */ 124 "mr %%r4,%1\n\t" /* set %r4 = buf */ 125 "mr %%r5,%2\n\t" /* set %r5 = n */ 126 "sc\n\t" /* write(stderr, buf, n) */ 127 "mr %0,%%r3\n" /* set __res = r3 */ 128 : "=mr" (__res) 129 : "g" (buf), "g" (n) 130 : "r0", "r3", "r4", "r5" ); 131} 132#endif 133 134/* For providing services. */ 135static HWord serviceFn ( HWord arg1, HWord arg2 ) 136{ 137 switch (arg1) { 138 case 0: /* EXIT */ 139 printf("---STOP---\n"); 140 printf("serviceFn:EXIT\n"); 141 printf("%llu bbs simulated\n", n_bbs_done); 142 printf("%d translations made, %d tt bytes\n", 143 n_translations_made, 8*trans_cache_used); 144 exit(0); 145 case 1: /* PUTC */ 146 putchar(arg2); 147 return 0; 148 case 2: /* MALLOC */ 149 return (HWord)malloc(arg2); 150 case 3: /* FREE */ 151 free((void*)arg2); 152 return 0; 153 default: 154 assert(0); 155 } 156} 157 158 159/* -------------------- */ 160/* continue execution on the real CPU (never returns) */ 161extern void switchback_asm(void); 162 163#if defined(__i386__) 164 165asm( 166"switchback_asm:\n" 167" movl sb_helper1, %eax\n" // eax = guest state ptr 168" movl 16(%eax), %esp\n" // switch stacks 169" pushl 56(%eax)\n" // push continuation addr 170" movl sb_helper2, %ebx\n" // get eflags 171" pushl %ebx\n" // eflags:CA 172" pushl 0(%eax)\n" // EAX:eflags:CA 173" movl 4(%eax), %ecx\n" 174" movl 8(%eax), %edx\n" 175" movl 12(%eax), %ebx\n" 176" movl 20(%eax), %ebp\n" 177" movl 24(%eax), %esi\n" 178" movl 28(%eax), %edi\n" 179" popl %eax\n" 180" popfl\n" 181" ret\n" 182); 183void switchback ( void ) 184{ 185 sb_helper1 = (HWord)&gst; 186 sb_helper2 = LibVEX_GuestX86_get_eflags(&gst); 187 switchback_asm(); // never returns 188} 189 190#elif defined(__x86_64__) 191 192asm( 193"switchback_asm:\n" 194" movq sb_helper1, %rax\n" // rax = guest state ptr 195" movq 32(%rax), %rsp\n" // switch stacks 196" pushq 168(%rax)\n" // push continuation addr 197" movq sb_helper2, %rbx\n" // get eflags 198" pushq %rbx\n" // eflags:CA 199" pushq 0(%rax)\n" // RAX:eflags:CA 200" movq 8(%rax), %rcx\n" 201" movq 16(%rax), %rdx\n" 202" movq 24(%rax), %rbx\n" 203" movq 40(%rax), %rbp\n" 204" movq 48(%rax), %rsi\n" 205" movq 56(%rax), %rdi\n" 206 207" movq 64(%rax), %r8\n" 208" movq 72(%rax), %r9\n" 209" movq 80(%rax), %r10\n" 210" movq 88(%rax), %r11\n" 211" movq 96(%rax), %r12\n" 212" movq 104(%rax), %r13\n" 213" movq 112(%rax), %r14\n" 214" movq 120(%rax), %r15\n" 215 216" popq %rax\n" 217" popfq\n" 218" ret\n" 219); 220void switchback ( void ) 221{ 222 sb_helper1 = (HWord)&gst; 223 sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst); 224 switchback_asm(); // never returns 225} 226 227#elif defined(__powerpc__) 228 229static void invalidate_icache(void *ptr, int nbytes) 230{ 231 unsigned long startaddr = (unsigned long) ptr; 232 unsigned long endaddr = startaddr + nbytes; 233 unsigned long addr; 234 unsigned long cls = CacheLineSize; 235 236 startaddr &= ~(cls - 1); 237 for (addr = startaddr; addr < endaddr; addr += cls) 238 asm volatile("dcbst 0,%0" : : "r" (addr)); 239 asm volatile("sync"); 240 for (addr = startaddr; addr < endaddr; addr += cls) 241 asm volatile("icbi 0,%0" : : "r" (addr)); 242 asm volatile("sync; isync"); 243} 244 245 246#if !defined(__powerpc64__) // ppc32 247asm( 248"switchback_asm:\n" 249// gst 250" lis %r31,sb_helper1@ha\n" // get hi-wd of guest_state_ptr addr 251" lwz %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31 252 253// LR 254" lwz %r3,900(%r31)\n" // guest_LR 255" mtlr %r3\n" // move to LR 256 257// CR 258" lis %r3,sb_helper2@ha\n" // get hi-wd of flags addr 259" lwz %r3,sb_helper2@l(%r3)\n" // load flags word to r3 260" mtcr %r3\n" // move r3 to CR 261 262// CTR 263" lwz %r3,904(%r31)\n" // guest_CTR 264" mtctr %r3\n" // move r3 to CTR 265 266// XER 267" lis %r3,sb_helper3@ha\n" // get hi-wd of xer addr 268" lwz %r3,sb_helper3@l(%r3)\n" // load xer word to r3 269" mtxer %r3\n" // move r3 to XER 270 271 272// GPR's 273" lwz %r0, 0(%r31)\n" 274" lwz %r1, 4(%r31)\n" // switch stacks (r1 = SP) 275" lwz %r2, 8(%r31)\n" 276" lwz %r3, 12(%r31)\n" 277" lwz %r4, 16(%r31)\n" 278" lwz %r5, 20(%r31)\n" 279" lwz %r6, 24(%r31)\n" 280" lwz %r7, 28(%r31)\n" 281" lwz %r8, 32(%r31)\n" 282" lwz %r9, 36(%r31)\n" 283" lwz %r10, 40(%r31)\n" 284" lwz %r11, 44(%r31)\n" 285" lwz %r12, 48(%r31)\n" 286" lwz %r13, 52(%r31)\n" 287" lwz %r14, 56(%r31)\n" 288" lwz %r15, 60(%r31)\n" 289" lwz %r16, 64(%r31)\n" 290" lwz %r17, 68(%r31)\n" 291" lwz %r18, 72(%r31)\n" 292" lwz %r19, 76(%r31)\n" 293" lwz %r20, 80(%r31)\n" 294" lwz %r21, 84(%r31)\n" 295" lwz %r22, 88(%r31)\n" 296" lwz %r23, 92(%r31)\n" 297" lwz %r24, 96(%r31)\n" 298" lwz %r25, 100(%r31)\n" 299" lwz %r26, 104(%r31)\n" 300" lwz %r27, 108(%r31)\n" 301" lwz %r28, 112(%r31)\n" 302" lwz %r29, 116(%r31)\n" 303" lwz %r30, 120(%r31)\n" 304" lwz %r31, 124(%r31)\n" 305"nop_start_point:\n" 306" nop\n" 307" nop\n" 308" nop\n" 309" nop\n" 310" nop\n" 311"nop_end_point:\n" 312); 313 314#else // ppc64 315 316asm( 317".text\n" 318" .global switchback_asm\n" 319" .section \".opd\",\"aw\"\n" 320" .align 3\n" 321"switchback_asm:\n" 322" .quad .switchback_asm,.TOC.@tocbase,0\n" 323" .previous\n" 324" .type .switchback_asm,@function\n" 325" .global .switchback_asm\n" 326".switchback_asm:\n" 327"switchback_asm_undotted:\n" 328 329// gst: load word of guest_state_ptr to r31 330" lis %r31,sb_helper1@highest\n" 331" ori %r31,%r31,sb_helper1@higher\n" 332" rldicr %r31,%r31,32,31\n" 333" oris %r31,%r31,sb_helper1@h\n" 334" ori %r31,%r31,sb_helper1@l\n" 335" ld %r31,0(%r31)\n" 336 337 338// LR 339" ld %r3,1032(%r31)\n" // guest_LR 340" mtlr %r3\n" // move to LR 341 342// CR 343" lis %r3,sb_helper2@highest\n" 344" ori %r3,%r3,sb_helper2@higher\n" 345" rldicr %r3,%r3,32,31\n" 346" oris %r3,%r3,sb_helper2@h\n" 347" ori %r3,%r3,sb_helper2@l\n" 348" ld %r3,0(%r3)\n" // load flags word to r3 349" mtcr %r3\n" // move r3 to CR 350 351// CTR 352" ld %r3,1040(%r31)\n" // guest_CTR 353" mtctr %r3\n" // move r3 to CTR 354 355// XER 356" lis %r3,sb_helper3@highest\n" 357" ori %r3,%r3,sb_helper3@higher\n" 358" rldicr %r3,%r3,32,31\n" 359" oris %r3,%r3,sb_helper3@h\n" 360" ori %r3,%r3,sb_helper3@l\n" 361" ld %r3,0(%r3)\n" // load xer word to r3 362" mtxer %r3\n" // move r3 to XER 363 364// GPR's 365" ld %r0, 0(%r31)\n" 366" ld %r1, 8(%r31)\n" // switch stacks (r1 = SP) 367" ld %r2, 16(%r31)\n" 368" ld %r3, 24(%r31)\n" 369" ld %r4, 32(%r31)\n" 370" ld %r5, 40(%r31)\n" 371" ld %r6, 48(%r31)\n" 372" ld %r7, 56(%r31)\n" 373" ld %r8, 64(%r31)\n" 374" ld %r9, 72(%r31)\n" 375" ld %r10, 80(%r31)\n" 376" ld %r11, 88(%r31)\n" 377" ld %r12, 96(%r31)\n" 378" ld %r13, 104(%r31)\n" 379" ld %r14, 112(%r31)\n" 380" ld %r15, 120(%r31)\n" 381" ld %r16, 128(%r31)\n" 382" ld %r17, 136(%r31)\n" 383" ld %r18, 144(%r31)\n" 384" ld %r19, 152(%r31)\n" 385" ld %r20, 160(%r31)\n" 386" ld %r21, 168(%r31)\n" 387" ld %r22, 176(%r31)\n" 388" ld %r23, 184(%r31)\n" 389" ld %r24, 192(%r31)\n" 390" ld %r25, 200(%r31)\n" 391" ld %r26, 208(%r31)\n" 392" ld %r27, 216(%r31)\n" 393" ld %r28, 224(%r31)\n" 394" ld %r29, 232(%r31)\n" 395" ld %r30, 240(%r31)\n" 396" ld %r31, 248(%r31)\n" 397"nop_start_point:\n" 398" nop\n" 399" nop\n" 400" nop\n" 401" nop\n" 402" nop\n" 403"nop_end_point:\n" 404); 405#endif 406 407extern void switchback_asm_undotted; 408extern void nop_start_point; 409extern void nop_end_point; 410void switchback ( void ) 411{ 412 Int i; 413 /* blargh. Copy the entire switchback_asm procedure into new 414 memory on which can can set both write and execute permissions, 415 so we can poke around with it and then run the results. */ 416 417#if defined(__powerpc64__) // ppc32 418 UChar* sa_start = (UChar*)&switchback_asm_undotted; 419#else 420 UChar* sa_start = (UChar*)&switchback_asm; 421#endif 422 UChar* sa_nop_start = (UChar*)&nop_start_point; 423 UChar* sa_end = (UChar*)&nop_end_point; 424 425#if 0 426 printf("sa_start %p\n", sa_start ); 427 printf("sa_nop_start %p\n", sa_nop_start); 428 printf("sa_end %p\n", sa_end); 429#endif 430 Int nbytes = sa_end - sa_start; 431 Int off_nopstart = sa_nop_start - sa_start; 432 if (0) 433 printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart); 434 435 /* copy it into mallocville */ 436 UChar* copy = mymalloc(nbytes); 437 assert(copy); 438 for (i = 0; i < nbytes; i++) 439 copy[i] = sa_start[i]; 440 441 UInt* p = (UInt*)(©[off_nopstart]); 442 443#if !defined(__powerpc64__) // ppc32 444 Addr32 addr_of_nop = (Addr32)p; 445 Addr32 where_to_go = gst.guest_CIA; 446 Int diff = ((Int)where_to_go) - ((Int)addr_of_nop); 447 448#if 0 449 printf("addr of first nop = 0x%x\n", addr_of_nop); 450 printf("where to go = 0x%x\n", where_to_go); 451 printf("diff = 0x%x\n", diff); 452#endif 453 454#else // ppc64 455 Addr64 addr_of_nop = (Addr64)p; 456 Addr64 where_to_go = gst.guest_CIA; 457 Long diff = ((Long)where_to_go) - ((Long)addr_of_nop); 458 459#if 0 460 printf("addr of first nop = 0x%llx\n", addr_of_nop); 461 printf("where to go = 0x%llx\n", where_to_go); 462 printf("diff = 0x%llx\n", diff); 463#endif 464#endif 465 466 if (diff < -0x2000000 || diff >= 0x2000000) { 467 // we're hosed. Give up 468 printf("hosed -- offset too large\n"); 469 assert(0); 470 } 471 472 sb_helper1 = (HWord)&gst; 473#if !defined(__powerpc64__) // ppc32 474 sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst); 475 sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst); 476#else // ppc64 477 sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst); 478 sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst); 479#endif 480 481 /* stay sane ... */ 482 assert(p[0] == 24<<26); /* nop */ 483 484 /* branch to diff */ 485 p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0)); 486 487 invalidate_icache( copy, nbytes ); 488 489#if defined(__powerpc64__) 490 //printf("jumping to %p\n", copy); 491 { ULong faketoc[3]; 492 void* v; 493 faketoc[0] = (ULong)copy; 494 v = &faketoc[0]; 495 ( (void(*)(void)) v )(); 496 } 497#else 498 ( (void(*)(void))copy )(); 499#endif 500} 501 502#else 503# error "Unknown arch (switchback)" 504#endif 505 506/* -------------------- */ 507static HWord f, gp, res; 508extern void run_translation_asm(void); 509 510#if defined(__i386__) 511asm( 512"run_translation_asm:\n" 513" pushal\n" 514" movl gp, %ebp\n" 515" movl f, %eax\n" 516" call *%eax\n" 517" movl %eax, res\n" 518" popal\n" 519" ret\n" 520); 521 522#elif defined(__x86_64__) 523asm( 524"run_translation_asm:\n" 525 526" pushq %rax\n" 527" pushq %rbx\n" 528" pushq %rcx\n" 529" pushq %rdx\n" 530" pushq %rbp\n" 531" pushq %rsi\n" 532" pushq %rdi\n" 533" pushq %r8\n" 534" pushq %r9\n" 535" pushq %r10\n" 536" pushq %r11\n" 537" pushq %r12\n" 538" pushq %r13\n" 539" pushq %r14\n" 540" pushq %r15\n" 541 542" movq gp, %rbp\n" 543" movq f, %rax\n" 544" call *%rax\n" 545" movq %rax, res\n" 546 547" popq %r15\n" 548" popq %r14\n" 549" popq %r13\n" 550" popq %r12\n" 551" popq %r11\n" 552" popq %r10\n" 553" popq %r9\n" 554" popq %r8\n" 555" popq %rdi\n" 556" popq %rsi\n" 557" popq %rbp\n" 558" popq %rdx\n" 559" popq %rcx\n" 560" popq %rbx\n" 561" popq %rax\n" 562 563" ret\n" 564); 565 566#elif defined(__powerpc__) 567 568#if !defined(__powerpc64__) // ppc32 569asm( 570"run_translation_asm:\n" 571 572// create new stack: 573// save old sp at first word & update sp 574" stwu 1,-256(1)\n" 575 576// save LR 577" mflr %r0\n" 578" stw %r0,260(%r1)\n" 579 580// leave hole @ 4(%r1) for a callee to save it's LR 581// no params 582// no need to save non-volatile CR fields 583 584// store registers to stack: just the callee-saved regs 585" stw %r13, 8(%r1)\n" 586" stw %r14, 12(%r1)\n" 587" stw %r15, 16(%r1)\n" 588" stw %r16, 20(%r1)\n" 589" stw %r17, 24(%r1)\n" 590" stw %r18, 28(%r1)\n" 591" stw %r19, 32(%r1)\n" 592" stw %r20, 36(%r1)\n" 593" stw %r21, 40(%r1)\n" 594" stw %r22, 44(%r1)\n" 595" stw %r23, 48(%r1)\n" 596" stw %r24, 52(%r1)\n" 597" stw %r25, 56(%r1)\n" 598" stw %r26, 60(%r1)\n" 599" stw %r27, 64(%r1)\n" 600" stw %r28, 68(%r1)\n" 601" stw %r29, 72(%r1)\n" 602" stw %r30, 76(%r1)\n" 603" stw %r31, 80(%r1)\n" 604 605// r31 (guest state ptr) := global var "gp" 606" lis %r31,gp@ha\n" 607" lwz %r31,gp@l(%r31)\n" 608 609// call translation address in global var "f" 610" lis %r4,f@ha\n" 611" lwz %r4,f@l(%r4)\n" 612" mtctr %r4\n" 613" bctrl\n" 614 615// save return value (in r3) into global var "res" 616" lis %r5,res@ha\n" 617" stw %r3,res@l(%r5)\n" 618 619// save possibly modified guest state ptr (r31) in "gp" 620" lis %r5,gp@ha\n" 621" stw %r31,gp@l(%r5)\n" 622 623// reload registers from stack 624" lwz %r13, 8(%r1)\n" 625" lwz %r14, 12(%r1)\n" 626" lwz %r15, 16(%r1)\n" 627" lwz %r16, 20(%r1)\n" 628" lwz %r17, 24(%r1)\n" 629" lwz %r18, 28(%r1)\n" 630" lwz %r19, 32(%r1)\n" 631" lwz %r20, 36(%r1)\n" 632" lwz %r21, 40(%r1)\n" 633" lwz %r22, 44(%r1)\n" 634" lwz %r23, 48(%r1)\n" 635" lwz %r24, 52(%r1)\n" 636" lwz %r25, 56(%r1)\n" 637" lwz %r26, 60(%r1)\n" 638" lwz %r27, 64(%r1)\n" 639" lwz %r28, 68(%r1)\n" 640" lwz %r29, 72(%r1)\n" 641" lwz %r30, 76(%r1)\n" 642" lwz %r31, 80(%r1)\n" 643 644// restore LR 645" lwz %r0,260(%r1)\n" 646" mtlr %r0\n" 647 648// restore previous stack pointer 649" addi %r1,%r1,256\n" 650 651// return 652" blr" 653); 654 655#else // ppc64 656 657asm( 658".text\n" 659" .global run_translation_asm\n" 660" .section \".opd\",\"aw\"\n" 661" .align 3\n" 662"run_translation_asm:\n" 663" .quad .run_translation_asm,.TOC.@tocbase,0\n" 664" .previous\n" 665" .type .run_translation_asm,@function\n" 666" .global .run_translation_asm\n" 667".run_translation_asm:\n" 668 669// save LR,CTR 670" mflr %r0\n" 671" std %r0,16(%r1)\n" 672" mfctr %r0\n" 673" std %r0,8(%r1)\n" 674 675// create new stack: 676// save old sp at first word & update sp 677" stdu 1,-256(1)\n" 678 679// leave hole @ 4(%r1) for a callee to save it's LR 680// no params 681// no need to save non-volatile CR fields 682 683// store registers to stack: just the callee-saved regs 684" std %r13, 48(%r1)\n" 685" std %r14, 56(%r1)\n" 686" std %r15, 64(%r1)\n" 687" std %r16, 72(%r1)\n" 688" std %r17, 80(%r1)\n" 689" std %r18, 88(%r1)\n" 690" std %r19, 96(%r1)\n" 691" std %r20, 104(%r1)\n" 692" std %r21, 112(%r1)\n" 693" std %r22, 120(%r1)\n" 694" std %r23, 128(%r1)\n" 695" std %r24, 136(%r1)\n" 696" std %r25, 144(%r1)\n" 697" std %r26, 152(%r1)\n" 698" std %r27, 160(%r1)\n" 699" std %r28, 168(%r1)\n" 700" std %r29, 176(%r1)\n" 701" std %r30, 184(%r1)\n" 702" std %r31, 192(%r1)\n" 703 704// r31 (guest state ptr) := global var "gp" 705" lis %r31,gp@highest\n" 706" ori %r31,%r31,gp@higher\n" 707" rldicr %r31,%r31,32,31\n" 708" oris %r31,%r31,gp@h\n" 709" ori %r31,%r31,gp@l\n" 710" ld %r31,0(%r31)\n" 711 712// call translation address in global var "f" 713" lis %r4,f@highest\n" 714" ori %r4,%r4,f@higher\n" 715" rldicr %r4,%r4,32,31\n" 716" oris %r4,%r4,f@h\n" 717" ori %r4,%r4,f@l\n" 718" ld %r4,0(%r4)\n" 719" mtctr %r4\n" 720" bctrl\n" 721 722// save return value (in r3) into global var "res" 723" lis %r5,res@highest\n" 724" ori %r5,%r5,res@higher\n" 725" rldicr %r5,%r5,32,31\n" 726" oris %r5,%r5,res@h\n" 727" ori %r5,%r5,res@l\n" 728" std %r3,0(%r5)\n" 729 730// save possibly modified guest state ptr (r31) in "gp" 731" lis %r5,gp@highest\n" 732" ori %r5,%r5,gp@higher\n" 733" rldicr %r5,%r5,32,31\n" 734" oris %r5,%r5,gp@h\n" 735" ori %r5,%r5,gp@l\n" 736" std %r31,0(%r5)\n" 737 738// reload registers from stack 739" ld %r13, 48(%r1)\n" 740" ld %r14, 56(%r1)\n" 741" ld %r15, 64(%r1)\n" 742" ld %r16, 72(%r1)\n" 743" ld %r17, 80(%r1)\n" 744" ld %r18, 88(%r1)\n" 745" ld %r19, 96(%r1)\n" 746" ld %r20, 104(%r1)\n" 747" ld %r21, 112(%r1)\n" 748" ld %r22, 120(%r1)\n" 749" ld %r23, 128(%r1)\n" 750" ld %r24, 136(%r1)\n" 751" ld %r25, 144(%r1)\n" 752" ld %r26, 152(%r1)\n" 753" ld %r27, 160(%r1)\n" 754" ld %r28, 168(%r1)\n" 755" ld %r29, 176(%r1)\n" 756" ld %r30, 184(%r1)\n" 757" ld %r31, 192(%r1)\n" 758 759// restore previous stack pointer 760" addi %r1,%r1,256\n" 761 762// restore LR,CTR 763" ld %r0,16(%r1)\n" 764" mtlr %r0\n" 765" ld %r0,8(%r1)\n" 766" mtctr %r0\n" 767 768// return 769" blr" 770); 771#endif 772 773#else 774 775# error "Unknown arch" 776#endif 777 778/* Run a translation at host address 'translation'. Return 779 True if Vex asked for an translation cache flush as a result. 780*/ 781Bool run_translation ( HWord translation ) 782{ 783 if (0 && DEBUG_TRACE_FLAGS) { 784 printf(" run translation %p\n", (void*)translation ); 785 printf(" simulated bb: %llu\n", n_bbs_done); 786 } 787 f = translation; 788 gp = (HWord)&gst; 789 run_translation_asm(); 790 gst.GuestPC = res; 791 n_bbs_done ++; 792 return gp==VEX_TRC_JMP_TINVAL; 793} 794 795HWord find_translation ( Addr64 guest_addr ) 796{ 797 Int i; 798 HWord __res; 799 if (0) 800 printf("find translation %p ... ", ULong_to_Ptr(guest_addr)); 801 for (i = 0; i < trans_table_used; i++) 802 if (trans_table[i].base[0] == guest_addr) 803 break; 804 if (i == trans_table_used) { 805 if (0) printf("none\n"); 806 return 0; /* not found */ 807 } 808 809 /* Move this translation one step towards the front, so finding it 810 next time round is just that little bit cheaper. */ 811 if (i > 2) { 812 VexGuestExtents tmpE = trans_table[i-1]; 813 ULong* tmpP = trans_tableP[i-1]; 814 trans_table[i-1] = trans_table[i]; 815 trans_tableP[i-1] = trans_tableP[i]; 816 trans_table[i] = tmpE; 817 trans_tableP[i] = tmpP; 818 i--; 819 } 820 821 __res = (HWord)trans_tableP[i]; 822 if (0) printf("%p\n", (void*)__res); 823 return __res; 824} 825 826#define N_TRANSBUF 5000 827static UChar transbuf[N_TRANSBUF]; 828void make_translation ( Addr64 guest_addr, Bool verbose ) 829{ 830 VexTranslateArgs vta; 831 VexTranslateResult tres; 832 VexArchInfo vex_archinfo; 833 Int trans_used, i, ws_needed; 834 835 if (trans_table_used >= N_TRANS_TABLE 836 || trans_cache_used >= N_TRANS_CACHE-1000) { 837 /* If things are looking to full, just dump 838 all the translations. */ 839 trans_cache_used = 0; 840 trans_table_used = 0; 841 } 842 843 assert(trans_table_used < N_TRANS_TABLE); 844 if (0) 845 printf("make translation %p\n", ULong_to_Ptr(guest_addr)); 846 847 LibVEX_default_VexArchInfo(&vex_archinfo); 848 vex_archinfo.subarch = VexSubArch; 849 vex_archinfo.ppc_cache_line_szB = CacheLineSize; 850 851 /* */ 852 vta.arch_guest = VexArch; 853 vta.archinfo_guest = vex_archinfo; 854 vta.arch_host = VexArch; 855 vta.archinfo_host = vex_archinfo; 856 vta.guest_bytes = (UChar*)ULong_to_Ptr(guest_addr); 857 vta.guest_bytes_addr = (Addr64)guest_addr; 858 vta.guest_bytes_addr_noredir = (Addr64)guest_addr; 859 vta.chase_into_ok = chase_into_ok; 860// vta.guest_extents = &vge; 861 vta.guest_extents = &trans_table[trans_table_used]; 862 vta.host_bytes = transbuf; 863 vta.host_bytes_size = N_TRANSBUF; 864 vta.host_bytes_used = &trans_used; 865 vta.instrument1 = NULL; 866 vta.instrument2 = NULL; 867 vta.do_self_check = False; 868 vta.traceflags = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS; 869 vta.dispatch = NULL; 870 871 tres = LibVEX_Translate ( &vta ); 872 873 assert(tres == VexTransOK); 874 ws_needed = (trans_used+7) / 8; 875 assert(ws_needed > 0); 876 assert(trans_cache_used + ws_needed < N_TRANS_CACHE); 877 n_translations_made++; 878 879 for (i = 0; i < trans_used; i++) { 880 HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i; 881 HChar* src = (HChar*)(&transbuf[i]); 882 *dst = *src; 883 } 884 885#if defined(__powerpc__) 886 invalidate_icache( &trans_cache[trans_cache_used], trans_used ); 887#endif 888 889 trans_tableP[trans_table_used] = &trans_cache[trans_cache_used]; 890 trans_table_used++; 891 trans_cache_used += ws_needed; 892} 893 894 895static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge ) 896{ 897 Int i; 898 for (i = 0; i < vge->n_used; i++) { 899 if (vge->base[i]+vge->len[i] <= start 900 || vge->base[i] >= start+len) { 901 /* ok */ 902 } else { 903 return True; 904 } 905 } 906 return False; /* no overlap */ 907} 908 909static void dump_translations ( Addr64 start, UInt len ) 910{ 911 Int i, j; 912 j = 0; 913 for (i = 0; i < trans_table_used; i++) { 914 if (overlap(start, len, &trans_table[i])) { 915 /* do nothing */ 916 } else { 917 assert(j <= i); 918 trans_table[j] = trans_table[i]; 919 trans_tableP[j] = trans_tableP[i]; 920 j++; 921 } 922 } 923 assert(j >= 0 && j <= trans_table_used); 924 if (0) printf("dumped %d translations\n", trans_table_used - j); 925 trans_table_used = j; 926} 927 928 929static ULong stopAfter = 0; 930static UChar* entryP = NULL; 931 932 933__attribute__ ((noreturn)) 934static 935void failure_exit ( void ) 936{ 937 fprintf(stdout, "VEX did failure_exit. Bye.\n"); 938 fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done); 939 exit(1); 940} 941 942static 943void log_bytes ( HChar* bytes, Int nbytes ) 944{ 945 fwrite ( bytes, 1, nbytes, stdout ); 946 fflush ( stdout ); 947} 948 949 950/* run simulated code forever (it will exit by calling 951 serviceFn(0)). */ 952static void run_simulator ( void ) 953{ 954 static Addr64 last_guest = 0; 955 Addr64 next_guest; 956 HWord next_host; 957 Bool need_inval; 958 while (1) { 959 next_guest = gst.GuestPC; 960 961 if (0) 962 printf("\nnext_guest: 0x%x\n", (UInt)next_guest); 963 964#if defined(__powerpc64__) 965 if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) { 966#else 967 if (next_guest == Ptr_to_ULong(&serviceFn)) { 968#endif 969 /* "do" the function call to serviceFn */ 970# if defined(__i386__) 971 { 972 HWord esp = gst.guest_ESP; 973 gst.guest_EIP = *(UInt*)(esp+0); 974 gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) ); 975 gst.guest_ESP = esp+4; 976 next_guest = gst.guest_EIP; 977 } 978# elif defined(__x86_64__) 979 { 980 HWord esp = gst.guest_RSP; 981 gst.guest_RIP = *(UInt*)(esp+0); 982 gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI ); 983 gst.guest_RSP = esp+8; 984 next_guest = gst.guest_RIP; 985 } 986# elif defined(__powerpc__) 987 { 988 gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 ); 989 gst.guest_CIA = gst.guest_LR; 990 next_guest = gst.guest_CIA; 991 } 992# else 993# error "Unknown arch" 994# endif 995 } 996 997 next_host = find_translation(next_guest); 998 if (next_host == 0) { 999 make_translation(next_guest,False); 1000 next_host = find_translation(next_guest); 1001 assert(next_host != 0); 1002 } 1003 1004 // Switchback 1005 if (n_bbs_done == stopAfter) { 1006 printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done); 1007#if 1 1008 if (last_guest) { 1009 printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1); 1010 make_translation(last_guest,True); 1011 } 1012#endif 1013#if 0 1014 if (next_guest) { 1015 printf("\n*** Current translation (bb:%llu):\n", n_bbs_done); 1016 make_translation(next_guest,True); 1017 } 1018#endif 1019 printf("--- end SWITCHBACK at bb:%llu ---\n", n_bbs_done); 1020 switchback(); 1021 assert(0); /*NOTREACHED*/ 1022 } 1023 1024 last_guest = next_guest; 1025 need_inval = run_translation(next_host); 1026 if (need_inval) { 1027#if defined(__powerpc__) 1028 dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN ); 1029 if (0) printf("dump translations done\n"); 1030#endif 1031 } 1032 } 1033} 1034 1035 1036static void usage ( void ) 1037{ 1038 printf("usage: switchback #bbs\n"); 1039 printf(" - begins switchback for basic block #bbs\n"); 1040 printf(" - use -1 for largest possible run without switchback\n\n"); 1041 exit(1); 1042} 1043 1044#if defined(__powerpc__) 1045 1046#if !defined(__powerpc64__) // ppc32 1047UInt saved_R2; 1048asm( 1049"get_R2:\n" 1050" lis %r10,saved_R2@ha\n" 1051" stw %r2,saved_R2@l(%r10)\n" 1052" blr\n" 1053); 1054#else // ppc64 1055ULong saved_R2; 1056ULong saved_R13; 1057asm( 1058".text\n" 1059" .global get_R2\n" 1060" .section \".opd\",\"aw\"\n" 1061" .align 3\n" 1062"get_R2:\n" 1063" .quad .get_R2,.TOC.@tocbase,0\n" 1064" .previous\n" 1065" .type .get_R2,@function\n" 1066" .global .get_R2\n" 1067".get_R2:\n" 1068" lis %r10,saved_R2@highest\n" 1069" ori %r10,%r10,saved_R2@higher\n" 1070" rldicr %r10,%r10,32,31\n" 1071" oris %r10,%r10,saved_R2@h\n" 1072" ori %r10,%r10,saved_R2@l\n" 1073" std %r2,0(%r10)\n" 1074" blr\n" 1075); 1076asm( 1077".text\n" 1078" .global get_R13\n" 1079" .section \".opd\",\"aw\"\n" 1080" .align 3\n" 1081"get_R13:\n" 1082" .quad .get_R13,.TOC.@tocbase,0\n" 1083" .previous\n" 1084" .type .get_R13,@function\n" 1085" .global .get_R13\n" 1086".get_R13:\n" 1087" lis %r10,saved_R13@highest\n" 1088" ori %r10,%r10,saved_R13@higher\n" 1089" rldicr %r10,%r10,32,31\n" 1090" oris %r10,%r10,saved_R13@h\n" 1091" ori %r10,%r10,saved_R13@l\n" 1092" std %r13,0(%r10)\n" 1093" blr\n" 1094); 1095#endif 1096extern void get_R2 ( void ); 1097extern void get_R13 ( void ); 1098#endif 1099 1100int main ( Int argc, HChar** argv ) 1101{ 1102 if (argc != 2) 1103 usage(); 1104 1105 stopAfter = (ULong)atoll(argv[1]); 1106 1107 extern void entry ( void*(*service)(int,int) ); 1108 entryP = (UChar*)&entry; 1109 1110 if (!entryP) { 1111 printf("switchback: can't find entry point\n"); 1112 exit(1); 1113 } 1114 1115 LibVEX_default_VexControl(&vcon); 1116 vcon.guest_max_insns=50; 1117 vcon.guest_chase_thresh=0; 1118 vcon.iropt_level=2; 1119 1120 LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon ); 1121 LibVEX_Guest_initialise(&gst); 1122 1123 /* set up as if a call to the entry point passing serviceFn as 1124 the one and only parameter */ 1125# if defined(__i386__) 1126 gst.guest_EIP = (UInt)entryP; 1127 gst.guest_ESP = (UInt)&gstack[25000]; 1128 *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn; 1129 *(UInt*)(gst.guest_ESP+0) = 0x12345678; 1130# elif defined(__x86_64__) 1131 gst.guest_RIP = (ULong)entryP; 1132 gst.guest_RSP = (ULong)&gstack[25000]; 1133 gst.guest_RDI = (ULong)serviceFn; 1134 *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL; 1135# elif defined(__powerpc__) 1136 get_R2(); 1137 1138#if !defined(__powerpc64__) // ppc32 1139 gst.guest_CIA = (UInt)entryP; 1140 gst.guest_GPR1 = (UInt)&gstack[25000]; /* stack pointer */ 1141 gst.guest_GPR3 = (UInt)serviceFn; /* param to entry */ 1142 gst.guest_GPR2 = saved_R2; 1143 gst.guest_LR = 0x12345678; /* bogus return address */ 1144#else // ppc64 1145 get_R13(); 1146 gst.guest_CIA = * (ULong*)entryP; 1147 gst.guest_GPR1 = (ULong)&gstack[25000]; /* stack pointer */ 1148 gst.guest_GPR3 = (ULong)serviceFn; /* param to entry */ 1149 gst.guest_GPR2 = saved_R2; 1150 gst.guest_GPR13 = saved_R13; 1151 gst.guest_LR = 0x1234567812345678ULL; /* bogus return address */ 1152// printf("setting CIA to %p\n", (void*)gst.guest_CIA); 1153#endif 1154 1155# else 1156# error "Unknown arch" 1157# endif 1158 1159 printf("\n---START---\n"); 1160 1161#if 1 1162 run_simulator(); 1163#else 1164 ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn); 1165#endif 1166 1167 1168 return 0; 1169} 1170