1
2/* HOW TO USE
3
413 Dec '05 - Linker no longer used (apart from mymalloc)
5Simply compile and link switchback.c with test_xxx.c,
6e.g. for ppc64:
7$ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
8
9Test file test_xxx.c must have an entry point called "entry",
10which expects to take a single argument which is a function pointer
11(to "serviceFn").
12
13Test file may not reference any other symbols.
14
15NOTE: POWERPC: it is critical, when using this on ppc, to set
16CacheLineSize to the right value.  Values we currently know of:
17
18   imac (G3):   32
19   G5 (ppc970): 128
20*/
21
22#include <stdio.h>
23#include <assert.h>
24#include <stdlib.h>
25#include <sys/types.h>
26#include <sys/stat.h>
27#include <unistd.h>
28
29#include "../pub/libvex_basictypes.h"
30#include "../pub/libvex_guest_x86.h"
31#include "../pub/libvex_guest_amd64.h"
32#include "../pub/libvex_guest_ppc32.h"
33#include "../pub/libvex_guest_ppc64.h"
34#include "../pub/libvex.h"
35#include "../pub/libvex_trc_values.h"
36#include "linker.h"
37
38static ULong n_bbs_done = 0;
39static Int   n_translations_made = 0;
40
41
42#if defined(__i386__)
43#  define VexGuestState             VexGuestX86State
44#  define LibVEX_Guest_initialise   LibVEX_GuestX86_initialise
45#  define VexArch                   VexArchX86
46#  define VexSubArch                VexSubArchX86_sse1
47#  define GuestPC                   guest_EIP
48#  define CacheLineSize             0/*irrelevant*/
49#elif defined(__x86_64__)
50#  define VexGuestState             VexGuestAMD64State
51#  define LibVEX_Guest_initialise   LibVEX_GuestAMD64_initialise
52#  define VexArch                   VexArchAMD64
53#  define VexSubArch                VexSubArch_NONE
54#  define GuestPC                   guest_RIP
55#  define CacheLineSize             0/*irrelevant*/
56#elif defined(__powerpc__)
57
58#if !defined(__powerpc64__) // ppc32
59#  define VexGuestState             VexGuestPPC32State
60#  define LibVEX_Guest_initialise   LibVEX_GuestPPC32_initialise
61#  define VexArch                   VexArchPPC32
62#  define VexSubArch                VexSubArchPPC32_FI
63#  define GuestPC                   guest_CIA
64#  define CacheLineSize             128
65#else
66#  define VexGuestState             VexGuestPPC64State
67#  define LibVEX_Guest_initialise   LibVEX_GuestPPC64_initialise
68#  define VexArch                   VexArchPPC64
69#  define VexSubArch                VexSubArchPPC64_FI
70#  define GuestPC                   guest_CIA
71#  define CacheLineSize             128
72#endif
73
74#else
75#   error "Unknown arch"
76#endif
77
78/* 7: show conversion into IR */
79/* 6: show after initial opt */
80/* 5: show after instrumentation */
81/* 4: show after second opt */
82/* 3: show after tree building */
83/* 2: show selected insns */
84/* 1: show after reg-alloc */
85/* 0: show final assembly */
86#define TEST_FLAGS (1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
87#define DEBUG_TRACE_FLAGS 0//(1<<7)|(0<<6)|(0<<5)|(0<<4)|(1<<3)|(1<<2)|(1<<1)|(1<<0)
88
89
90/* guest state */
91UInt gstack[50000];
92VexGuestState gst;
93VexControl vcon;
94
95/* only used for the switchback transition */
96/* i386:  helper1 = &gst, helper2 = %EFLAGS */
97/* amd64: helper1 = &gst, helper2 = %EFLAGS */
98/* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
99HWord sb_helper1 = 0;
100HWord sb_helper2 = 0;
101HWord sb_helper3 = 0;
102
103/* translation cache */
104#define N_TRANS_CACHE 1000000
105#define N_TRANS_TABLE 10000
106
107ULong trans_cache[N_TRANS_CACHE];
108VexGuestExtents trans_table [N_TRANS_TABLE];
109ULong*          trans_tableP[N_TRANS_TABLE];
110
111Int trans_cache_used = 0;
112Int trans_table_used = 0;
113
114static Bool chase_into_ok ( Addr64 dst ) { return False; }
115
116#if 0
117// local_sys_write_stderr(&c,1);
118static void local_sys_write_stderr ( HChar* buf, Int n )
119{
120   UInt __res;
121   __asm__ volatile (
122      "li %%r0,4\n\t"      /* set %r0 = __NR_write */
123      "li %%r3,1\n\t"      /* set %r3 = stdout */
124      "mr %%r4,%1\n\t"     /* set %r4 = buf */
125      "mr %%r5,%2\n\t"     /* set %r5 = n */
126      "sc\n\t"             /* write(stderr, buf, n) */
127      "mr %0,%%r3\n"       /* set __res = r3 */
128      : "=mr" (__res)
129      : "g" (buf), "g" (n)
130      : "r0", "r3", "r4", "r5" );
131}
132#endif
133
134/* For providing services. */
135static HWord serviceFn ( HWord arg1, HWord arg2 )
136{
137   switch (arg1) {
138      case 0: /* EXIT */
139         printf("---STOP---\n");
140         printf("serviceFn:EXIT\n");
141	 printf("%llu bbs simulated\n", n_bbs_done);
142	 printf("%d translations made, %d tt bytes\n",
143                n_translations_made, 8*trans_cache_used);
144         exit(0);
145      case 1: /* PUTC */
146         putchar(arg2);
147         return 0;
148      case 2: /* MALLOC */
149         return (HWord)malloc(arg2);
150      case 3: /* FREE */
151         free((void*)arg2);
152         return 0;
153      default:
154         assert(0);
155   }
156}
157
158
159/* -------------------- */
160/* continue execution on the real CPU (never returns) */
161extern void switchback_asm(void);
162
163#if defined(__i386__)
164
165asm(
166"switchback_asm:\n"
167"   movl sb_helper1, %eax\n"  // eax = guest state ptr
168"   movl  16(%eax), %esp\n"   // switch stacks
169"   pushl 56(%eax)\n"         // push continuation addr
170"   movl sb_helper2, %ebx\n"  // get eflags
171"   pushl %ebx\n"             // eflags:CA
172"   pushl 0(%eax)\n"          //  EAX:eflags:CA
173"   movl 4(%eax), %ecx\n"
174"   movl 8(%eax), %edx\n"
175"   movl 12(%eax), %ebx\n"
176"   movl 20(%eax), %ebp\n"
177"   movl 24(%eax), %esi\n"
178"   movl 28(%eax), %edi\n"
179"   popl %eax\n"
180"   popfl\n"
181"   ret\n"
182);
183void switchback ( void )
184{
185   sb_helper1 = (HWord)&gst;
186   sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
187   switchback_asm(); // never returns
188}
189
190#elif defined(__x86_64__)
191
192asm(
193"switchback_asm:\n"
194"   movq sb_helper1, %rax\n"  // rax = guest state ptr
195"   movq  32(%rax), %rsp\n"   // switch stacks
196"   pushq 168(%rax)\n"        // push continuation addr
197"   movq sb_helper2, %rbx\n"  // get eflags
198"   pushq %rbx\n"             // eflags:CA
199"   pushq 0(%rax)\n"          // RAX:eflags:CA
200"   movq 8(%rax), %rcx\n"
201"   movq 16(%rax), %rdx\n"
202"   movq 24(%rax), %rbx\n"
203"   movq 40(%rax), %rbp\n"
204"   movq 48(%rax), %rsi\n"
205"   movq 56(%rax), %rdi\n"
206
207"   movq 64(%rax), %r8\n"
208"   movq 72(%rax), %r9\n"
209"   movq 80(%rax), %r10\n"
210"   movq 88(%rax), %r11\n"
211"   movq 96(%rax), %r12\n"
212"   movq 104(%rax), %r13\n"
213"   movq 112(%rax), %r14\n"
214"   movq 120(%rax), %r15\n"
215
216"   popq %rax\n"
217"   popfq\n"
218"   ret\n"
219);
220void switchback ( void )
221{
222   sb_helper1 = (HWord)&gst;
223   sb_helper2 = LibVEX_GuestAMD64_get_rflags(&gst);
224   switchback_asm(); // never returns
225}
226
227#elif defined(__powerpc__)
228
229static void invalidate_icache(void *ptr, int nbytes)
230{
231   unsigned long startaddr = (unsigned long) ptr;
232   unsigned long endaddr = startaddr + nbytes;
233   unsigned long addr;
234   unsigned long cls = CacheLineSize;
235
236   startaddr &= ~(cls - 1);
237   for (addr = startaddr; addr < endaddr; addr += cls)
238      asm volatile("dcbst 0,%0" : : "r" (addr));
239   asm volatile("sync");
240   for (addr = startaddr; addr < endaddr; addr += cls)
241      asm volatile("icbi 0,%0" : : "r" (addr));
242   asm volatile("sync; isync");
243}
244
245
246#if !defined(__powerpc64__) // ppc32
247asm(
248"switchback_asm:\n"
249// gst
250"   lis  %r31,sb_helper1@ha\n"      // get hi-wd of guest_state_ptr addr
251"   lwz  %r31,sb_helper1@l(%r31)\n" // load word of guest_state_ptr to r31
252
253// LR
254"   lwz  %r3,900(%r31)\n"           // guest_LR
255"   mtlr %r3\n"                     // move to LR
256
257// CR
258"   lis  %r3,sb_helper2@ha\n"       // get hi-wd of flags addr
259"   lwz  %r3,sb_helper2@l(%r3)\n"   // load flags word to r3
260"   mtcr %r3\n"                     // move r3 to CR
261
262// CTR
263"   lwz %r3,904(%r31)\n"       // guest_CTR
264"   mtctr %r3\n"               // move r3 to CTR
265
266// XER
267"   lis  %r3,sb_helper3@ha\n"       // get hi-wd of xer addr
268"   lwz  %r3,sb_helper3@l(%r3)\n"   // load xer word to r3
269"   mtxer %r3\n"                     // move r3 to XER
270
271
272// GPR's
273"   lwz %r0,    0(%r31)\n"
274"   lwz %r1,    4(%r31)\n"     // switch stacks (r1 = SP)
275"   lwz %r2,    8(%r31)\n"
276"   lwz %r3,   12(%r31)\n"
277"   lwz %r4,   16(%r31)\n"
278"   lwz %r5,   20(%r31)\n"
279"   lwz %r6,   24(%r31)\n"
280"   lwz %r7,   28(%r31)\n"
281"   lwz %r8,   32(%r31)\n"
282"   lwz %r9,   36(%r31)\n"
283"   lwz %r10,  40(%r31)\n"
284"   lwz %r11,  44(%r31)\n"
285"   lwz %r12,  48(%r31)\n"
286"   lwz %r13,  52(%r31)\n"
287"   lwz %r14,  56(%r31)\n"
288"   lwz %r15,  60(%r31)\n"
289"   lwz %r16,  64(%r31)\n"
290"   lwz %r17,  68(%r31)\n"
291"   lwz %r18,  72(%r31)\n"
292"   lwz %r19,  76(%r31)\n"
293"   lwz %r20,  80(%r31)\n"
294"   lwz %r21,  84(%r31)\n"
295"   lwz %r22,  88(%r31)\n"
296"   lwz %r23,  92(%r31)\n"
297"   lwz %r24,  96(%r31)\n"
298"   lwz %r25, 100(%r31)\n"
299"   lwz %r26, 104(%r31)\n"
300"   lwz %r27, 108(%r31)\n"
301"   lwz %r28, 112(%r31)\n"
302"   lwz %r29, 116(%r31)\n"
303"   lwz %r30, 120(%r31)\n"
304"   lwz %r31, 124(%r31)\n"
305"nop_start_point:\n"
306"   nop\n"
307"   nop\n"
308"   nop\n"
309"   nop\n"
310"   nop\n"
311"nop_end_point:\n"
312);
313
314#else // ppc64
315
316asm(
317".text\n"
318"   .global switchback_asm\n"
319"   .section \".opd\",\"aw\"\n"
320"   .align 3\n"
321"switchback_asm:\n"
322"   .quad .switchback_asm,.TOC.@tocbase,0\n"
323"   .previous\n"
324"   .type .switchback_asm,@function\n"
325"   .global  .switchback_asm\n"
326".switchback_asm:\n"
327"switchback_asm_undotted:\n"
328
329// gst: load word of guest_state_ptr to r31
330"   lis    %r31,sb_helper1@highest\n"
331"   ori    %r31,%r31,sb_helper1@higher\n"
332"   rldicr %r31,%r31,32,31\n"
333"   oris   %r31,%r31,sb_helper1@h\n"
334"   ori    %r31,%r31,sb_helper1@l\n"
335"   ld     %r31,0(%r31)\n"
336
337
338// LR
339"   ld   %r3,1032(%r31)\n"          // guest_LR
340"   mtlr %r3\n"                     // move to LR
341
342// CR
343"   lis    %r3,sb_helper2@highest\n"
344"   ori    %r3,%r3,sb_helper2@higher\n"
345"   rldicr %r3,%r3,32,31\n"
346"   oris   %r3,%r3,sb_helper2@h\n"
347"   ori    %r3,%r3,sb_helper2@l\n"
348"   ld     %r3,0(%r3)\n"            // load flags word to r3
349"   mtcr   %r3\n"                   // move r3 to CR
350
351// CTR
352"   ld     %r3,1040(%r31)\n"        // guest_CTR
353"   mtctr  %r3\n"                   // move r3 to CTR
354
355// XER
356"   lis    %r3,sb_helper3@highest\n"
357"   ori    %r3,%r3,sb_helper3@higher\n"
358"   rldicr %r3,%r3,32,31\n"
359"   oris   %r3,%r3,sb_helper3@h\n"
360"   ori    %r3,%r3,sb_helper3@l\n"
361"   ld     %r3,0(%r3)\n"            // load xer word to r3
362"   mtxer  %r3\n"                   // move r3 to XER
363
364// GPR's
365"   ld %r0,    0(%r31)\n"
366"   ld %r1,    8(%r31)\n"     // switch stacks (r1 = SP)
367"   ld %r2,   16(%r31)\n"
368"   ld %r3,   24(%r31)\n"
369"   ld %r4,   32(%r31)\n"
370"   ld %r5,   40(%r31)\n"
371"   ld %r6,   48(%r31)\n"
372"   ld %r7,   56(%r31)\n"
373"   ld %r8,   64(%r31)\n"
374"   ld %r9,   72(%r31)\n"
375"   ld %r10,  80(%r31)\n"
376"   ld %r11,  88(%r31)\n"
377"   ld %r12,  96(%r31)\n"
378"   ld %r13, 104(%r31)\n"
379"   ld %r14, 112(%r31)\n"
380"   ld %r15, 120(%r31)\n"
381"   ld %r16, 128(%r31)\n"
382"   ld %r17, 136(%r31)\n"
383"   ld %r18, 144(%r31)\n"
384"   ld %r19, 152(%r31)\n"
385"   ld %r20, 160(%r31)\n"
386"   ld %r21, 168(%r31)\n"
387"   ld %r22, 176(%r31)\n"
388"   ld %r23, 184(%r31)\n"
389"   ld %r24, 192(%r31)\n"
390"   ld %r25, 200(%r31)\n"
391"   ld %r26, 208(%r31)\n"
392"   ld %r27, 216(%r31)\n"
393"   ld %r28, 224(%r31)\n"
394"   ld %r29, 232(%r31)\n"
395"   ld %r30, 240(%r31)\n"
396"   ld %r31, 248(%r31)\n"
397"nop_start_point:\n"
398"   nop\n"
399"   nop\n"
400"   nop\n"
401"   nop\n"
402"   nop\n"
403"nop_end_point:\n"
404);
405#endif
406
407extern void switchback_asm_undotted;
408extern void nop_start_point;
409extern void nop_end_point;
410void switchback ( void )
411{
412   Int i;
413   /* blargh.  Copy the entire switchback_asm procedure into new
414      memory on which can can set both write and execute permissions,
415      so we can poke around with it and then run the results. */
416
417#if defined(__powerpc64__) // ppc32
418   UChar* sa_start     = (UChar*)&switchback_asm_undotted;
419#else
420   UChar* sa_start     = (UChar*)&switchback_asm;
421#endif
422   UChar* sa_nop_start = (UChar*)&nop_start_point;
423   UChar* sa_end       = (UChar*)&nop_end_point;
424
425#if 0
426   printf("sa_start     %p\n", sa_start );
427   printf("sa_nop_start %p\n", sa_nop_start);
428   printf("sa_end       %p\n", sa_end);
429#endif
430   Int nbytes       = sa_end - sa_start;
431   Int off_nopstart = sa_nop_start - sa_start;
432   if (0)
433      printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
434
435   /* copy it into mallocville */
436   UChar* copy = mymalloc(nbytes);
437   assert(copy);
438   for (i = 0; i < nbytes; i++)
439      copy[i] = sa_start[i];
440
441   UInt* p = (UInt*)(&copy[off_nopstart]);
442
443#if !defined(__powerpc64__) // ppc32
444   Addr32 addr_of_nop = (Addr32)p;
445   Addr32 where_to_go = gst.guest_CIA;
446   Int    diff = ((Int)where_to_go) - ((Int)addr_of_nop);
447
448#if 0
449   printf("addr of first nop = 0x%x\n", addr_of_nop);
450   printf("where to go       = 0x%x\n", where_to_go);
451   printf("diff = 0x%x\n", diff);
452#endif
453
454#else // ppc64
455   Addr64 addr_of_nop = (Addr64)p;
456   Addr64 where_to_go = gst.guest_CIA;
457   Long   diff = ((Long)where_to_go) - ((Long)addr_of_nop);
458
459#if 0
460   printf("addr of first nop = 0x%llx\n", addr_of_nop);
461   printf("where to go       = 0x%llx\n", where_to_go);
462   printf("diff = 0x%llx\n", diff);
463#endif
464#endif
465
466   if (diff < -0x2000000 || diff >= 0x2000000) {
467     // we're hosed.  Give up
468     printf("hosed -- offset too large\n");
469     assert(0);
470   }
471
472   sb_helper1 = (HWord)&gst;
473#if !defined(__powerpc64__) // ppc32
474   sb_helper2 = LibVEX_GuestPPC32_get_CR(&gst);
475   sb_helper3 = LibVEX_GuestPPC32_get_XER(&gst);
476#else // ppc64
477   sb_helper2 = LibVEX_GuestPPC64_get_CR(&gst);
478   sb_helper3 = LibVEX_GuestPPC64_get_XER(&gst);
479#endif
480
481   /* stay sane ... */
482   assert(p[0] == 24<<26); /* nop */
483
484   /* branch to diff */
485   p[0] = ((18<<26) | (((diff >> 2) & 0xFFFFFF) << 2) | (0<<1) | (0<<0));
486
487   invalidate_icache( copy, nbytes );
488
489#if defined(__powerpc64__)
490   //printf("jumping to %p\n", copy);
491   { ULong faketoc[3];
492     void* v;
493     faketoc[0] = (ULong)copy;
494     v = &faketoc[0];
495     ( (void(*)(void)) v )();
496   }
497#else
498   ( (void(*)(void))copy )();
499#endif
500}
501
502#else
503#   error "Unknown arch (switchback)"
504#endif
505
506/* -------------------- */
507static HWord f, gp, res;
508extern void run_translation_asm(void);
509
510#if defined(__i386__)
511asm(
512"run_translation_asm:\n"
513"   pushal\n"
514"   movl gp, %ebp\n"
515"   movl f, %eax\n"
516"   call *%eax\n"
517"   movl %eax, res\n"
518"   popal\n"
519"   ret\n"
520);
521
522#elif defined(__x86_64__)
523asm(
524"run_translation_asm:\n"
525
526"   pushq %rax\n"
527"   pushq %rbx\n"
528"   pushq %rcx\n"
529"   pushq %rdx\n"
530"   pushq %rbp\n"
531"   pushq %rsi\n"
532"   pushq %rdi\n"
533"   pushq %r8\n"
534"   pushq %r9\n"
535"   pushq %r10\n"
536"   pushq %r11\n"
537"   pushq %r12\n"
538"   pushq %r13\n"
539"   pushq %r14\n"
540"   pushq %r15\n"
541
542"   movq gp, %rbp\n"
543"   movq f, %rax\n"
544"   call *%rax\n"
545"   movq %rax, res\n"
546
547"   popq  %r15\n"
548"   popq  %r14\n"
549"   popq  %r13\n"
550"   popq  %r12\n"
551"   popq  %r11\n"
552"   popq  %r10\n"
553"   popq  %r9\n"
554"   popq  %r8\n"
555"   popq  %rdi\n"
556"   popq  %rsi\n"
557"   popq  %rbp\n"
558"   popq  %rdx\n"
559"   popq  %rcx\n"
560"   popq  %rbx\n"
561"   popq  %rax\n"
562
563"   ret\n"
564);
565
566#elif defined(__powerpc__)
567
568#if !defined(__powerpc64__) // ppc32
569asm(
570"run_translation_asm:\n"
571
572// create new stack:
573// save old sp at first word & update sp
574"   stwu 1,-256(1)\n"
575
576// save LR
577"   mflr %r0\n"
578"   stw  %r0,260(%r1)\n"
579
580// leave hole @ 4(%r1) for a callee to save it's LR
581// no params
582// no need to save non-volatile CR fields
583
584// store registers to stack: just the callee-saved regs
585"   stw %r13,  8(%r1)\n"
586"   stw %r14, 12(%r1)\n"
587"   stw %r15, 16(%r1)\n"
588"   stw %r16, 20(%r1)\n"
589"   stw %r17, 24(%r1)\n"
590"   stw %r18, 28(%r1)\n"
591"   stw %r19, 32(%r1)\n"
592"   stw %r20, 36(%r1)\n"
593"   stw %r21, 40(%r1)\n"
594"   stw %r22, 44(%r1)\n"
595"   stw %r23, 48(%r1)\n"
596"   stw %r24, 52(%r1)\n"
597"   stw %r25, 56(%r1)\n"
598"   stw %r26, 60(%r1)\n"
599"   stw %r27, 64(%r1)\n"
600"   stw %r28, 68(%r1)\n"
601"   stw %r29, 72(%r1)\n"
602"   stw %r30, 76(%r1)\n"
603"   stw %r31, 80(%r1)\n"
604
605// r31 (guest state ptr) := global var "gp"
606"   lis %r31,gp@ha\n"
607"   lwz %r31,gp@l(%r31)\n"
608
609// call translation address in global var "f"
610"   lis %r4,f@ha\n"
611"   lwz %r4,f@l(%r4)\n"
612"   mtctr %r4\n"
613"   bctrl\n"
614
615// save return value (in r3) into global var "res"
616"   lis %r5,res@ha\n"
617"   stw %r3,res@l(%r5)\n"
618
619// save possibly modified guest state ptr (r31) in "gp"
620"   lis %r5,gp@ha\n"
621"   stw %r31,gp@l(%r5)\n"
622
623// reload registers from stack
624"   lwz %r13,  8(%r1)\n"
625"   lwz %r14, 12(%r1)\n"
626"   lwz %r15, 16(%r1)\n"
627"   lwz %r16, 20(%r1)\n"
628"   lwz %r17, 24(%r1)\n"
629"   lwz %r18, 28(%r1)\n"
630"   lwz %r19, 32(%r1)\n"
631"   lwz %r20, 36(%r1)\n"
632"   lwz %r21, 40(%r1)\n"
633"   lwz %r22, 44(%r1)\n"
634"   lwz %r23, 48(%r1)\n"
635"   lwz %r24, 52(%r1)\n"
636"   lwz %r25, 56(%r1)\n"
637"   lwz %r26, 60(%r1)\n"
638"   lwz %r27, 64(%r1)\n"
639"   lwz %r28, 68(%r1)\n"
640"   lwz %r29, 72(%r1)\n"
641"   lwz %r30, 76(%r1)\n"
642"   lwz %r31, 80(%r1)\n"
643
644// restore LR
645"   lwz  %r0,260(%r1)\n"
646"   mtlr %r0\n"
647
648// restore previous stack pointer
649"   addi %r1,%r1,256\n"
650
651// return
652"   blr"
653);
654
655#else // ppc64
656
657asm(
658".text\n"
659"   .global run_translation_asm\n"
660"   .section \".opd\",\"aw\"\n"
661"   .align 3\n"
662"run_translation_asm:\n"
663"   .quad .run_translation_asm,.TOC.@tocbase,0\n"
664"   .previous\n"
665"   .type .run_translation_asm,@function\n"
666"   .global  .run_translation_asm\n"
667".run_translation_asm:\n"
668
669// save LR,CTR
670"   mflr  %r0\n"
671"   std   %r0,16(%r1)\n"
672"   mfctr %r0\n"
673"   std   %r0,8(%r1)\n"
674
675// create new stack:
676// save old sp at first word & update sp
677"   stdu 1,-256(1)\n"
678
679// leave hole @ 4(%r1) for a callee to save it's LR
680// no params
681// no need to save non-volatile CR fields
682
683// store registers to stack: just the callee-saved regs
684"   std %r13,  48(%r1)\n"
685"   std %r14,  56(%r1)\n"
686"   std %r15,  64(%r1)\n"
687"   std %r16,  72(%r1)\n"
688"   std %r17,  80(%r1)\n"
689"   std %r18,  88(%r1)\n"
690"   std %r19,  96(%r1)\n"
691"   std %r20, 104(%r1)\n"
692"   std %r21, 112(%r1)\n"
693"   std %r22, 120(%r1)\n"
694"   std %r23, 128(%r1)\n"
695"   std %r24, 136(%r1)\n"
696"   std %r25, 144(%r1)\n"
697"   std %r26, 152(%r1)\n"
698"   std %r27, 160(%r1)\n"
699"   std %r28, 168(%r1)\n"
700"   std %r29, 176(%r1)\n"
701"   std %r30, 184(%r1)\n"
702"   std %r31, 192(%r1)\n"
703
704// r31 (guest state ptr) := global var "gp"
705"   lis    %r31,gp@highest\n"
706"   ori    %r31,%r31,gp@higher\n"
707"   rldicr %r31,%r31,32,31\n"
708"   oris   %r31,%r31,gp@h\n"
709"   ori    %r31,%r31,gp@l\n"
710"   ld     %r31,0(%r31)\n"
711
712// call translation address in global var "f"
713"   lis    %r4,f@highest\n"
714"   ori    %r4,%r4,f@higher\n"
715"   rldicr %r4,%r4,32,31\n"
716"   oris   %r4,%r4,f@h\n"
717"   ori    %r4,%r4,f@l\n"
718"   ld     %r4,0(%r4)\n"
719"   mtctr  %r4\n"
720"   bctrl\n"
721
722// save return value (in r3) into global var "res"
723"   lis    %r5,res@highest\n"
724"   ori    %r5,%r5,res@higher\n"
725"   rldicr %r5,%r5,32,31\n"
726"   oris   %r5,%r5,res@h\n"
727"   ori    %r5,%r5,res@l\n"
728"   std    %r3,0(%r5)\n"
729
730// save possibly modified guest state ptr (r31) in "gp"
731"   lis    %r5,gp@highest\n"
732"   ori    %r5,%r5,gp@higher\n"
733"   rldicr %r5,%r5,32,31\n"
734"   oris   %r5,%r5,gp@h\n"
735"   ori    %r5,%r5,gp@l\n"
736"   std    %r31,0(%r5)\n"
737
738// reload registers from stack
739"   ld %r13,  48(%r1)\n"
740"   ld %r14,  56(%r1)\n"
741"   ld %r15,  64(%r1)\n"
742"   ld %r16,  72(%r1)\n"
743"   ld %r17,  80(%r1)\n"
744"   ld %r18,  88(%r1)\n"
745"   ld %r19,  96(%r1)\n"
746"   ld %r20, 104(%r1)\n"
747"   ld %r21, 112(%r1)\n"
748"   ld %r22, 120(%r1)\n"
749"   ld %r23, 128(%r1)\n"
750"   ld %r24, 136(%r1)\n"
751"   ld %r25, 144(%r1)\n"
752"   ld %r26, 152(%r1)\n"
753"   ld %r27, 160(%r1)\n"
754"   ld %r28, 168(%r1)\n"
755"   ld %r29, 176(%r1)\n"
756"   ld %r30, 184(%r1)\n"
757"   ld %r31, 192(%r1)\n"
758
759// restore previous stack pointer
760"   addi %r1,%r1,256\n"
761
762// restore LR,CTR
763"   ld    %r0,16(%r1)\n"
764"   mtlr  %r0\n"
765"   ld    %r0,8(%r1)\n"
766"   mtctr %r0\n"
767
768// return
769"   blr"
770);
771#endif
772
773#else
774
775#   error "Unknown arch"
776#endif
777
778/* Run a translation at host address 'translation'.  Return
779   True if Vex asked for an translation cache flush as a result.
780*/
781Bool run_translation ( HWord translation )
782{
783   if (0 && DEBUG_TRACE_FLAGS) {
784      printf(" run translation %p\n", (void*)translation );
785      printf(" simulated bb: %llu\n", n_bbs_done);
786   }
787   f = translation;
788   gp = (HWord)&gst;
789   run_translation_asm();
790   gst.GuestPC = res;
791   n_bbs_done ++;
792   return gp==VEX_TRC_JMP_TINVAL;
793}
794
795HWord find_translation ( Addr64 guest_addr )
796{
797   Int i;
798   HWord __res;
799   if (0)
800      printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
801   for (i = 0; i < trans_table_used; i++)
802     if (trans_table[i].base[0] == guest_addr)
803        break;
804   if (i == trans_table_used) {
805      if (0) printf("none\n");
806      return 0; /* not found */
807   }
808
809   /* Move this translation one step towards the front, so finding it
810      next time round is just that little bit cheaper. */
811   if (i > 2) {
812      VexGuestExtents tmpE = trans_table[i-1];
813      ULong*          tmpP = trans_tableP[i-1];
814      trans_table[i-1]  = trans_table[i];
815      trans_tableP[i-1] = trans_tableP[i];
816      trans_table[i] = tmpE;
817      trans_tableP[i] = tmpP;
818      i--;
819   }
820
821   __res = (HWord)trans_tableP[i];
822   if (0) printf("%p\n", (void*)__res);
823   return __res;
824}
825
826#define N_TRANSBUF 5000
827static UChar transbuf[N_TRANSBUF];
828void make_translation ( Addr64 guest_addr, Bool verbose )
829{
830   VexTranslateArgs   vta;
831   VexTranslateResult tres;
832   VexArchInfo vex_archinfo;
833   Int trans_used, i, ws_needed;
834
835   if (trans_table_used >= N_TRANS_TABLE
836       || trans_cache_used >= N_TRANS_CACHE-1000) {
837      /* If things are looking to full, just dump
838         all the translations. */
839      trans_cache_used = 0;
840      trans_table_used = 0;
841   }
842
843   assert(trans_table_used < N_TRANS_TABLE);
844   if (0)
845      printf("make translation %p\n", ULong_to_Ptr(guest_addr));
846
847   LibVEX_default_VexArchInfo(&vex_archinfo);
848   vex_archinfo.subarch = VexSubArch;
849   vex_archinfo.ppc_cache_line_szB = CacheLineSize;
850
851   /* */
852   vta.arch_guest       = VexArch;
853   vta.archinfo_guest   = vex_archinfo;
854   vta.arch_host        = VexArch;
855   vta.archinfo_host    = vex_archinfo;
856   vta.guest_bytes      = (UChar*)ULong_to_Ptr(guest_addr);
857   vta.guest_bytes_addr = (Addr64)guest_addr;
858   vta.guest_bytes_addr_noredir = (Addr64)guest_addr;
859   vta.chase_into_ok    = chase_into_ok;
860//   vta.guest_extents    = &vge;
861   vta.guest_extents    = &trans_table[trans_table_used];
862   vta.host_bytes       = transbuf;
863   vta.host_bytes_size  = N_TRANSBUF;
864   vta.host_bytes_used  = &trans_used;
865   vta.instrument1      = NULL;
866   vta.instrument2      = NULL;
867   vta.do_self_check    = False;
868   vta.traceflags       = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
869   vta.dispatch         = NULL;
870
871   tres = LibVEX_Translate ( &vta );
872
873   assert(tres == VexTransOK);
874   ws_needed = (trans_used+7) / 8;
875   assert(ws_needed > 0);
876   assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
877   n_translations_made++;
878
879   for (i = 0; i < trans_used; i++) {
880      HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
881      HChar* src = (HChar*)(&transbuf[i]);
882      *dst = *src;
883   }
884
885#if defined(__powerpc__)
886   invalidate_icache( &trans_cache[trans_cache_used], trans_used );
887#endif
888
889   trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
890   trans_table_used++;
891   trans_cache_used += ws_needed;
892}
893
894
895static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
896{
897   Int i;
898   for (i = 0; i < vge->n_used; i++) {
899     if (vge->base[i]+vge->len[i] <= start
900         || vge->base[i] >= start+len) {
901       /* ok */
902     } else {
903        return True;
904     }
905   }
906   return False; /* no overlap */
907}
908
909static void dump_translations ( Addr64 start, UInt len )
910{
911   Int i, j;
912   j = 0;
913   for (i = 0; i < trans_table_used; i++) {
914      if (overlap(start, len, &trans_table[i])) {
915         /* do nothing */
916      } else {
917         assert(j <= i);
918         trans_table[j] = trans_table[i];
919         trans_tableP[j] = trans_tableP[i];
920	 j++;
921      }
922   }
923   assert(j >= 0 && j <= trans_table_used);
924   if (0) printf("dumped %d translations\n", trans_table_used - j);
925   trans_table_used = j;
926}
927
928
929static ULong  stopAfter = 0;
930static UChar* entryP    = NULL;
931
932
933__attribute__ ((noreturn))
934static
935void failure_exit ( void )
936{
937   fprintf(stdout, "VEX did failure_exit.  Bye.\n");
938   fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
939   exit(1);
940}
941
942static
943void log_bytes ( HChar* bytes, Int nbytes )
944{
945   fwrite ( bytes, 1, nbytes, stdout );
946   fflush ( stdout );
947}
948
949
950/* run simulated code forever (it will exit by calling
951   serviceFn(0)). */
952static void run_simulator ( void )
953{
954   static Addr64 last_guest = 0;
955   Addr64 next_guest;
956   HWord next_host;
957   Bool need_inval;
958   while (1) {
959      next_guest = gst.GuestPC;
960
961      if (0)
962         printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
963
964#if defined(__powerpc64__)
965      if (next_guest == Ptr_to_ULong( (void*)(*(ULong*)(&serviceFn)) )) {
966#else
967      if (next_guest == Ptr_to_ULong(&serviceFn)) {
968#endif
969         /* "do" the function call to serviceFn */
970#        if defined(__i386__)
971         {
972            HWord esp = gst.guest_ESP;
973            gst.guest_EIP = *(UInt*)(esp+0);
974            gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
975            gst.guest_ESP = esp+4;
976            next_guest = gst.guest_EIP;
977         }
978#        elif defined(__x86_64__)
979         {
980            HWord esp = gst.guest_RSP;
981            gst.guest_RIP = *(UInt*)(esp+0);
982            gst.guest_RAX = serviceFn( gst.guest_RDI, gst.guest_RSI );
983            gst.guest_RSP = esp+8;
984            next_guest = gst.guest_RIP;
985         }
986#        elif defined(__powerpc__)
987         {
988            gst.guest_GPR3 = serviceFn( gst.guest_GPR3, gst.guest_GPR4 );
989            gst.guest_CIA  = gst.guest_LR;
990            next_guest     = gst.guest_CIA;
991         }
992#        else
993#        error "Unknown arch"
994#        endif
995      }
996
997      next_host = find_translation(next_guest);
998      if (next_host == 0) {
999         make_translation(next_guest,False);
1000         next_host = find_translation(next_guest);
1001         assert(next_host != 0);
1002      }
1003
1004      // Switchback
1005      if (n_bbs_done == stopAfter) {
1006         printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
1007#if 1
1008         if (last_guest) {
1009            printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
1010            make_translation(last_guest,True);
1011         }
1012#endif
1013#if 0
1014         if (next_guest) {
1015            printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
1016            make_translation(next_guest,True);
1017         }
1018#endif
1019         printf("---  end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
1020         switchback();
1021         assert(0); /*NOTREACHED*/
1022      }
1023
1024      last_guest = next_guest;
1025      need_inval = run_translation(next_host);
1026      if (need_inval) {
1027#if defined(__powerpc__)
1028         dump_translations( (Addr64)gst.guest_TISTART, gst.guest_TILEN );
1029	 if (0) printf("dump translations done\n");
1030#endif
1031      }
1032   }
1033}
1034
1035
1036static void usage ( void )
1037{
1038   printf("usage: switchback #bbs\n");
1039   printf("   - begins switchback for basic block #bbs\n");
1040   printf("   - use -1 for largest possible run without switchback\n\n");
1041   exit(1);
1042}
1043
1044#if defined(__powerpc__)
1045
1046#if !defined(__powerpc64__) // ppc32
1047UInt saved_R2;
1048asm(
1049"get_R2:\n"
1050"   lis  %r10,saved_R2@ha\n"
1051"   stw  %r2,saved_R2@l(%r10)\n"
1052"   blr\n"
1053);
1054#else // ppc64
1055ULong saved_R2;
1056ULong saved_R13;
1057asm(
1058".text\n"
1059"   .global get_R2\n"
1060"   .section \".opd\",\"aw\"\n"
1061"   .align 3\n"
1062"get_R2:\n"
1063"   .quad .get_R2,.TOC.@tocbase,0\n"
1064"   .previous\n"
1065"   .type .get_R2,@function\n"
1066"   .global  .get_R2\n"
1067".get_R2:\n"
1068"   lis    %r10,saved_R2@highest\n"
1069"   ori    %r10,%r10,saved_R2@higher\n"
1070"   rldicr %r10,%r10,32,31\n"
1071"   oris   %r10,%r10,saved_R2@h\n"
1072"   ori    %r10,%r10,saved_R2@l\n"
1073"   std    %r2,0(%r10)\n"
1074"   blr\n"
1075);
1076asm(
1077".text\n"
1078"   .global get_R13\n"
1079"   .section \".opd\",\"aw\"\n"
1080"   .align 3\n"
1081"get_R13:\n"
1082"   .quad .get_R13,.TOC.@tocbase,0\n"
1083"   .previous\n"
1084"   .type .get_R13,@function\n"
1085"   .global  .get_R13\n"
1086".get_R13:\n"
1087"   lis    %r10,saved_R13@highest\n"
1088"   ori    %r10,%r10,saved_R13@higher\n"
1089"   rldicr %r10,%r10,32,31\n"
1090"   oris   %r10,%r10,saved_R13@h\n"
1091"   ori    %r10,%r10,saved_R13@l\n"
1092"   std    %r13,0(%r10)\n"
1093"   blr\n"
1094);
1095#endif
1096extern void get_R2 ( void );
1097extern void get_R13 ( void );
1098#endif
1099
1100int main ( Int argc, HChar** argv )
1101{
1102   if (argc != 2)
1103      usage();
1104
1105   stopAfter = (ULong)atoll(argv[1]);
1106
1107   extern void entry ( void*(*service)(int,int) );
1108   entryP = (UChar*)&entry;
1109
1110   if (!entryP) {
1111      printf("switchback: can't find entry point\n");
1112      exit(1);
1113   }
1114
1115   LibVEX_default_VexControl(&vcon);
1116   vcon.guest_max_insns=50;
1117   vcon.guest_chase_thresh=0;
1118   vcon.iropt_level=2;
1119
1120   LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
1121   LibVEX_Guest_initialise(&gst);
1122
1123   /* set up as if a call to the entry point passing serviceFn as
1124      the one and only parameter */
1125#  if defined(__i386__)
1126   gst.guest_EIP = (UInt)entryP;
1127   gst.guest_ESP = (UInt)&gstack[25000];
1128   *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
1129   *(UInt*)(gst.guest_ESP+0) = 0x12345678;
1130#  elif defined(__x86_64__)
1131   gst.guest_RIP = (ULong)entryP;
1132   gst.guest_RSP = (ULong)&gstack[25000];
1133   gst.guest_RDI = (ULong)serviceFn;
1134   *(ULong*)(gst.guest_RSP+0) = 0x12345678AABBCCDDULL;
1135#  elif defined(__powerpc__)
1136   get_R2();
1137
1138#if !defined(__powerpc64__) // ppc32
1139   gst.guest_CIA   = (UInt)entryP;
1140   gst.guest_GPR1  = (UInt)&gstack[25000]; /* stack pointer */
1141   gst.guest_GPR3  = (UInt)serviceFn; /* param to entry */
1142   gst.guest_GPR2  = saved_R2;
1143   gst.guest_LR    = 0x12345678; /* bogus return address */
1144#else // ppc64
1145   get_R13();
1146   gst.guest_CIA   = * (ULong*)entryP;
1147   gst.guest_GPR1  = (ULong)&gstack[25000]; /* stack pointer */
1148   gst.guest_GPR3  = (ULong)serviceFn;      /* param to entry */
1149   gst.guest_GPR2  = saved_R2;
1150   gst.guest_GPR13 = saved_R13;
1151   gst.guest_LR    = 0x1234567812345678ULL; /* bogus return address */
1152//   printf("setting CIA to %p\n", (void*)gst.guest_CIA);
1153#endif
1154
1155#  else
1156#  error "Unknown arch"
1157#  endif
1158
1159   printf("\n---START---\n");
1160
1161#if 1
1162   run_simulator();
1163#else
1164   ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
1165#endif
1166
1167
1168   return 0;
1169}
1170