1
2/* HOW TO USE
3
413 Dec '05 - Linker no longer used (apart from mymalloc)
5Simply compile and link switchback.c with test_xxx.c,
6e.g. for ppc64:
7$ (cd .. && make EXTRA_CFLAGS="-m64" libvex_ppc64_linux.a) && gcc -m64 -mregnames -Wall -Wshadow -Wno-long-long -Winline -O -g -o switchback switchback.c linker.c ../libvex_ppc64_linux.a test_xxx.c
8
9Test file test_xxx.c must have an entry point called "entry",
10which expects to take a single argument which is a function pointer
11(to "serviceFn").
12
13Test file may not reference any other symbols.
14
15NOTE: POWERPC: it is critical, when using this on ppc, to set
16CacheLineSize to the right value.  Values we currently know of:
17
18   imac (G3):   32
19   G5 (ppc970): 128
20
21ARM64:
22  (cd .. && make -f Makefile-gcc libvex-arm64-linux.a) \
23     && $CC -Wall -O -g -o switchback switchback.c linker.c \
24     ../libvex-arm64-linux.a test_emfloat.c
25*/
26
27#include <stdio.h>
28#include <assert.h>
29#include <stdlib.h>
30#include <string.h>
31#include <sys/types.h>
32#include <sys/stat.h>
33#include <unistd.h>
34
35#include "../pub/libvex_basictypes.h"
36#include "../pub/libvex_guest_x86.h"
37#include "../pub/libvex_guest_amd64.h"
38#include "../pub/libvex_guest_ppc32.h"
39#include "../pub/libvex_guest_ppc64.h"
40#include "../pub/libvex_guest_arm64.h"
41#include "../pub/libvex.h"
42#include "../pub/libvex_trc_values.h"
43#include "linker.h"
44
45static ULong n_bbs_done = 0;
46static Int   n_translations_made = 0;
47
48
49#if defined(__i386__)
50#  define VexGuestState             VexGuestX86State
51#  define LibVEX_Guest_initialise   LibVEX_GuestX86_initialise
52#  define VexArch                   VexArchX86
53#  define VexSubArch                VexSubArchX86_sse1
54#  define GuestPC                   guest_EIP
55#  define CacheLineSize             0/*irrelevant*/
56
57#elif defined(__aarch64__) && !defined(__arm__)
58#  define VexGuestState             VexGuestARM64State
59#  define LibVEX_Guest_initialise   LibVEX_GuestARM64_initialise
60#  define VexArch                   VexArchARM64
61#  define VexSubArch                VexSubArch_NONE
62#  define GuestPC                   guest_PC
63#  define CacheLineSize             0/*irrelevant*/
64
65#else
66#   error "Unknown arch"
67#endif
68
69/* 7: show conversion into IR */
70/* 6: show after initial opt */
71/* 5: show after instrumentation */
72/* 4: show after second opt */
73/* 3: show after tree building */
74/* 2: show selected insns */
75/* 1: show after reg-alloc */
76/* 0: show final assembly */
77#define TEST_FLAGS ((1<<7)|(1<<3)|(1<<2)|(1<<1)|(1<<0))
78#define DEBUG_TRACE_FLAGS ((0<<7)|(0<<6)|(0<<5)|(0<<4)| \
79                           (0<<3)|(0<<2)|(0<<1)|(0<<0))
80
81typedef  unsigned long int  Addr;
82
83
84/* guest state */
85ULong gstack[64000] __attribute__((aligned(16)));
86VexGuestState gst;
87VexControl vcon;
88
89/* only used for the switchback transition */
90/* i386:  helper1 = &gst, helper2 = %EFLAGS */
91/* amd64: helper1 = &gst, helper2 = %EFLAGS */
92/* ppc32: helper1 = &gst, helper2 = %CR, helper3 = %XER */
93/* arm64: helper1 = &gst, helper2 = 32x0:NZCV:28x0 */
94HWord sb_helper1 = 0;
95HWord sb_helper2 = 0;
96HWord sb_helper3 = 0;
97
98/* translation cache */
99#define N_TRANS_CACHE 1000000
100#define N_TRANS_TABLE 10000
101
102ULong trans_cache[N_TRANS_CACHE];
103VexGuestExtents trans_table [N_TRANS_TABLE];
104ULong*          trans_tableP[N_TRANS_TABLE];
105
106Int trans_cache_used = 0;
107Int trans_table_used = 0;
108
109static Bool chase_into_ok ( void* opaque, Addr64 dst ) {
110   return False;
111}
112
113static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
114   return 0;
115}
116
117
118/* For providing services. */
119static HWord serviceFn ( HWord arg1, HWord arg2 )
120{
121   switch (arg1) {
122      case 0: /* EXIT */
123         printf("---STOP---\n");
124         printf("serviceFn:EXIT\n");
125	 printf("%llu bbs simulated\n", n_bbs_done);
126	 printf("%d translations made, %d tt bytes\n",
127                n_translations_made, 8*trans_cache_used);
128         exit(0);
129      case 1: /* PUTC */
130         putchar(arg2);
131         return 0;
132      case 2: /* MALLOC */
133         return (HWord)malloc(arg2);
134      case 3: /* FREE */
135         free((void*)arg2);
136         return 0;
137      default:
138         assert(0);
139   }
140}
141
142
143// needed for arm64 ?
144static void invalidate_icache(void *ptr, unsigned long nbytes)
145{
146   // This function, invalidate_icache, for arm64_linux,
147   // is copied from
148   // https://github.com/armvixl/vixl/blob/master/src/a64/cpu-a64.cc
149   // which has the following copyright notice:
150   /*
151   Copyright 2013, ARM Limited
152   All rights reserved.
153
154   Redistribution and use in source and binary forms, with or without
155   modification, are permitted provided that the following conditions are met:
156
157   * Redistributions of source code must retain the above copyright notice,
158     this list of conditions and the following disclaimer.
159   * Redistributions in binary form must reproduce the above copyright notice,
160     this list of conditions and the following disclaimer in the documentation
161     and/or other materials provided with the distribution.
162   * Neither the name of ARM Limited nor the names of its contributors may be
163     used to endorse or promote products derived from this software without
164     specific prior written permission.
165
166   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
167   ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
168   WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
169   DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
170   FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
171   DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
172   SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
173   CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
174   OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
175   OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
176   */
177
178   // Ask what the I and D line sizes are
179   UInt cache_type_register;
180   // Copy the content of the cache type register to a core register.
181   __asm__ __volatile__ ("mrs %[ctr], ctr_el0" // NOLINT
182                         : [ctr] "=r" (cache_type_register));
183
184   const Int kDCacheLineSizeShift = 16;
185   const Int kICacheLineSizeShift = 0;
186   const UInt kDCacheLineSizeMask = 0xf << kDCacheLineSizeShift;
187   const UInt kICacheLineSizeMask = 0xf << kICacheLineSizeShift;
188
189   // The cache type register holds the size of the I and D caches as a power of
190   // two.
191   const UInt dcache_line_size_power_of_two =
192       (cache_type_register & kDCacheLineSizeMask) >> kDCacheLineSizeShift;
193   const UInt icache_line_size_power_of_two =
194       (cache_type_register & kICacheLineSizeMask) >> kICacheLineSizeShift;
195
196   const UInt dcache_line_size_ = 1 << dcache_line_size_power_of_two;
197   const UInt icache_line_size_ = 1 << icache_line_size_power_of_two;
198
199   Addr start = (Addr)ptr;
200   // Sizes will be used to generate a mask big enough to cover a pointer.
201   Addr dsize = (Addr)dcache_line_size_;
202   Addr isize = (Addr)icache_line_size_;
203
204   // Cache line sizes are always a power of 2.
205   Addr dstart = start & ~(dsize - 1);
206   Addr istart = start & ~(isize - 1);
207   Addr end    = start + nbytes;
208
209   __asm__ __volatile__ (
210     // Clean every line of the D cache containing the target data.
211     "0: \n\t"
212     // dc : Data Cache maintenance
213     // c : Clean
214     // va : by (Virtual) Address
215     // u : to the point of Unification
216     // The point of unification for a processor is the point by which the
217     // instruction and data caches are guaranteed to see the same copy of a
218     // memory location. See ARM DDI 0406B page B2-12 for more information.
219     "dc cvau, %[dline] \n\t"
220     "add %[dline], %[dline], %[dsize] \n\t"
221     "cmp %[dline], %[end] \n\t"
222     "b.lt 0b \n\t"
223     // Barrier to make sure the effect of the code above is visible to the rest
224     // of the world.
225     // dsb : Data Synchronisation Barrier
226     // ish : Inner SHareable domain
227     // The point of unification for an Inner Shareable shareability domain is
228     // the point by which the instruction and data caches of all the processors
229     // in that Inner Shareable shareability domain are guaranteed to see the
230     // same copy of a memory location. See ARM DDI 0406B page B2-12 for more
231     // information.
232     "dsb ish \n\t"
233     // Invalidate every line of the I cache containing the target data.
234     "1: \n\t"
235     // ic : instruction cache maintenance
236     // i : invalidate
237     // va : by address
238     // u : to the point of unification
239     "ic ivau, %[iline] \n\t"
240     "add %[iline], %[iline], %[isize] \n\t"
241     "cmp %[iline], %[end] \n\t"
242     "b.lt 1b \n\t"
243     // Barrier to make sure the effect of the code above is visible to the rest
244     // of the world.
245     "dsb ish \n\t"
246     // Barrier to ensure any prefetching which happened before this code is
247     // discarded.
248     // isb : Instruction Synchronisation Barrier
249     "isb \n\t"
250     : [dline] "+r" (dstart),
251       [iline] "+r" (istart)
252     : [dsize] "r" (dsize),
253       [isize] "r" (isize),
254       [end] "r" (end)
255     // This code does not write to memory but without the dependency gcc might
256     // move this code before the code is generated.
257     : "cc", "memory"
258   );
259
260}
261
262
263/* -------------------- */
264/* continue execution on the real CPU (never returns) */
265
266#if defined(__i386__)
267
268extern void switchback_asm(void);
269asm(
270"switchback_asm:\n"
271"   movl sb_helper1, %eax\n"  // eax = guest state ptr
272"   movl  16(%eax), %esp\n"   // switch stacks
273"   pushl 56(%eax)\n"         // push continuation addr
274"   movl sb_helper2, %ebx\n"  // get eflags
275"   pushl %ebx\n"             // eflags:CA
276"   pushl 0(%eax)\n"          //  EAX:eflags:CA
277"   movl 4(%eax), %ecx\n"
278"   movl 8(%eax), %edx\n"
279"   movl 12(%eax), %ebx\n"
280"   movl 20(%eax), %ebp\n"
281"   movl 24(%eax), %esi\n"
282"   movl 28(%eax), %edi\n"
283"   popl %eax\n"
284"   popfl\n"
285"   ret\n"
286);
287void switchback ( void )
288{
289   sb_helper1 = (HWord)&gst;
290   sb_helper2 = LibVEX_GuestX86_get_eflags(&gst);
291   switchback_asm(); // never returns
292}
293
294#elif defined(__aarch64__)
295
296extern void switchback_asm(HWord x0_gst, HWord x1_pstate);
297asm(
298"switchback_asm:"
299"   mrs x30, nzcv"  "\n"
300"   and x30, x30, #0xFFFFFFFF0FFFFFFF"  "\n"
301"   and x1,  x1,  #0x00000000F0000000"  "\n"
302"   orr x30, x30, x1"  "\n"
303"   msr nzcv, x30"  "\n"
304
305"   ldr x30, [x0, #16 + 8*37]"  "\n"
306"   msr tpidr_el0, x30"  "\n"
307
308"   ldr x30, [x0, #16 + 8*31]"  "\n"
309"   mov sp,  x30"  "\n"
310
311"   add x30, x0, #(16 + 8*38 + 16*0)"  "\n"
312"   ldr q0,  [x30], #16"   "\n"
313"   ldr q1,  [x30], #16"   "\n"
314"   ldr q2,  [x30], #16"   "\n"
315"   ldr q3,  [x30], #16"   "\n"
316"   ldr q4,  [x30], #16"   "\n"
317"   ldr q5,  [x30], #16"   "\n"
318"   ldr q6,  [x30], #16"   "\n"
319"   ldr q7,  [x30], #16"   "\n"
320"   ldr q8,  [x30], #16"   "\n"
321"   ldr q9,  [x30], #16"   "\n"
322"   ldr q10, [x30], #16"   "\n"
323"   ldr q11, [x30], #16"   "\n"
324"   ldr q12, [x30], #16"   "\n"
325"   ldr q13, [x30], #16"   "\n"
326"   ldr q14, [x30], #16"   "\n"
327"   ldr q15, [x30], #16"   "\n"
328"   ldr q16, [x30], #16"   "\n"
329"   ldr q17, [x30], #16"   "\n"
330"   ldr q18, [x30], #16"   "\n"
331"   ldr q19, [x30], #16"   "\n"
332"   ldr q20, [x30], #16"   "\n"
333"   ldr q21, [x30], #16"   "\n"
334"   ldr q22, [x30], #16"   "\n"
335"   ldr q23, [x30], #16"   "\n"
336"   ldr q24, [x30], #16"   "\n"
337"   ldr q25, [x30], #16"   "\n"
338"   ldr q26, [x30], #16"   "\n"
339"   ldr q27, [x30], #16"   "\n"
340"   ldr q28, [x30], #16"   "\n"
341"   ldr q29, [x30], #16"   "\n"
342"   ldr q30, [x30], #16"   "\n"
343"   ldr q31, [x30], #16"   "\n"
344
345"   ldr x30, [x0, #16+8*30]"  "\n"
346"   ldr x29, [x0, #16+8*29]"  "\n"
347"   ldr x28, [x0, #16+8*28]"  "\n"
348"   ldr x27, [x0, #16+8*27]"  "\n"
349"   ldr x26, [x0, #16+8*26]"  "\n"
350"   ldr x25, [x0, #16+8*25]"  "\n"
351"   ldr x24, [x0, #16+8*24]"  "\n"
352"   ldr x23, [x0, #16+8*23]"  "\n"
353"   ldr x22, [x0, #16+8*22]"  "\n"
354"   ldr x21, [x0, #16+8*21]"  "\n"
355"   ldr x20, [x0, #16+8*20]"  "\n"
356"   ldr x19, [x0, #16+8*19]"  "\n"
357"   ldr x18, [x0, #16+8*18]"  "\n"
358"   ldr x17, [x0, #16+8*17]"  "\n"
359"   ldr x16, [x0, #16+8*16]"  "\n"
360"   ldr x15, [x0, #16+8*15]"  "\n"
361"   ldr x14, [x0, #16+8*14]"  "\n"
362"   ldr x13, [x0, #16+8*13]"  "\n"
363"   ldr x12, [x0, #16+8*12]"  "\n"
364"   ldr x11, [x0, #16+8*11]"  "\n"
365"   ldr x10, [x0, #16+8*10]"  "\n"
366"   ldr x9,  [x0, #16+8*9]"   "\n"
367"   ldr x8,  [x0, #16+8*8]"   "\n"
368"   ldr x7,  [x0, #16+8*7]"   "\n"
369"   ldr x6,  [x0, #16+8*6]"   "\n"
370"   ldr x5,  [x0, #16+8*5]"   "\n"
371"   ldr x4,  [x0, #16+8*4]"   "\n"
372"   ldr x3,  [x0, #16+8*3]"   "\n"
373"   ldr x2,  [x0, #16+8*2]"   "\n"
374"   ldr x1,  [x0, #16+8*1]"   "\n"
375"   ldr x0,  [x0, #16+8*0]"   "\n"
376
377"nop_start_point:"            "\n"
378"   nop"  "\n" // this will be converted into a relative jump
379"nop_end_point:"              "\n"
380);
381
382extern void nop_start_point(void);
383extern void nop_end_point(void);
384
385void switchback ( void )
386{
387  assert(offsetof(VexGuestARM64State, guest_X0)  == 16 + 8*0);
388  assert(offsetof(VexGuestARM64State, guest_X30) == 16 + 8*30);
389  assert(offsetof(VexGuestARM64State, guest_SP)  == 16 + 8*31);
390  assert(offsetof(VexGuestARM64State, guest_TPIDR_EL0) == 16 + 8*37);
391  assert(offsetof(VexGuestARM64State, guest_Q0)  == 16 + 8*38 + 16*0);
392
393  HWord arg0 = (HWord)&gst;
394  HWord arg1 = LibVEX_GuestARM64_get_nzcv(&gst);
395
396  /* Copy the entire switchback_asm procedure into writable and
397     executable memory. */
398
399  UChar* sa_start     = (UChar*)&switchback_asm;
400  UChar* sa_nop_start = (UChar*)&nop_start_point;
401  UChar* sa_end       = (UChar*)&nop_end_point;
402
403  Int i;
404  Int nbytes       = sa_end - sa_start;
405  Int off_nopstart = sa_nop_start - sa_start;
406  if (0)
407     printf("nbytes = %d, nopstart = %d\n", nbytes, off_nopstart);
408
409   /* copy it into mallocville */
410   UChar* copy = mymalloc(nbytes);
411   assert(copy);
412   for (i = 0; i < nbytes; i++)
413      copy[i] = sa_start[i];
414
415   UInt* p = (UInt*)(&copy[off_nopstart]);
416
417   Addr64 addr_of_nop = (Addr64)p;
418   Addr64 where_to_go = gst.guest_PC;
419   Long   diff = ((Long)where_to_go) - ((Long)addr_of_nop);
420
421   if (0) {
422     printf("addr of first nop = 0x%llx\n", addr_of_nop);
423     printf("where to go       = 0x%llx\n", where_to_go);
424     printf("diff = 0x%llx\n", diff);
425   }
426
427   if (diff < -0x8000000LL || diff >= 0x8000000LL) {
428     // we're hosed.  Give up
429     printf("hosed -- offset too large\n");
430     assert(0);
431   }
432
433   /* stay sane ... */
434   assert(p[0] == 0xd503201f); /* nop */
435
436   /* branch to diff */
437   p[0] = 0x14000000 | ((diff >> 2) & 0x3FFFFFF);
438
439   invalidate_icache( copy, nbytes );
440
441   ( (void(*)(HWord,HWord))copy )(arg0, arg1);
442}
443
444#else
445# error "Unknown plat"
446#endif
447
448
449
450/* -------------------- */
451// f    holds is the host code address
452// gp   holds the guest state pointer to use
453// res  is to hold the result.  Or some such.
454static HWord block[2]; // f, gp;
455extern HWord run_translation_asm(void);
456
457extern void disp_chain_assisted(void);
458
459#if defined(__aarch64__)
460asm(
461"run_translation_asm:"            "\n"
462"   stp  x29, x30, [sp, #-16]!"   "\n"
463"   stp  x27, x28, [sp, #-16]!"   "\n"
464"   stp  x25, x26, [sp, #-16]!"   "\n"
465"   stp  x23, x24, [sp, #-16]!"   "\n"
466"   stp  x21, x22, [sp, #-16]!"   "\n"
467"   stp  x19, x20, [sp, #-16]!"   "\n"
468"   stp  x0,  xzr, [sp, #-16]!"   "\n"
469"   adrp x0, block"               "\n"
470"   add  x0, x0, :lo12:block"     "\n"
471"   ldr  x21, [x0, #8]"           "\n"  // load GSP
472"   ldr  x1,  [x0, #0]"           "\n"  // Host address
473"   br   x1"                 "\n"  // go (we wind up at disp_chain_assisted)
474
475"disp_chain_assisted:"            "\n" // x21 holds the trc.  Return it.
476"   mov  x1, x21" "\n"
477    /* Restore int regs, but not x1. */
478"   ldp  x0,  xzr, [sp], #16"    "\n"
479"   ldp  x19, x20, [sp], #16"    "\n"
480"   ldp  x21, x22, [sp], #16"    "\n"
481"   ldp  x23, x24, [sp], #16"    "\n"
482"   ldp  x25, x26, [sp], #16"    "\n"
483"   ldp  x27, x28, [sp], #16"    "\n"
484"   ldp  x29, x30, [sp], #16"    "\n"
485"   mov  x0, x1"                 "\n"
486"   ret"                         "\n"
487);
488
489#elif defined(__i386__)
490
491asm(
492"run_translation_asm:\n"
493"   pushal\n"
494"   movl gp, %ebp\n"
495"   movl f, %eax\n"
496"   call *%eax\n"
497"   movl %eax, res\n"
498"   popal\n"
499"   ret\n"
500);
501
502#else
503# error "Unknown arch"
504#endif
505
506
507/* Run a translation at host address 'translation' and return the TRC.
508*/
509HWord run_translation ( HWord translation )
510{
511   if (0 && DEBUG_TRACE_FLAGS) {
512      printf(" run translation %p\n", (void*)translation );
513      printf(" simulated bb: %llu\n", n_bbs_done);
514   }
515   block[0] = translation;
516   block[1] = (HWord)&gst;
517   HWord trc = run_translation_asm();
518   n_bbs_done ++;
519   return trc;
520}
521
522HWord find_translation ( Addr64 guest_addr )
523{
524   Int i;
525   HWord __res;
526   if (0)
527      printf("find translation %p ... ", ULong_to_Ptr(guest_addr));
528   for (i = 0; i < trans_table_used; i++)
529     if (trans_table[i].base[0] == guest_addr)
530        break;
531   if (i == trans_table_used) {
532      if (0) printf("none\n");
533      return 0; /* not found */
534   }
535
536   /* Move this translation one step towards the front, so finding it
537      next time round is just that little bit cheaper. */
538   if (i > 2) {
539      VexGuestExtents tmpE = trans_table[i-1];
540      ULong*          tmpP = trans_tableP[i-1];
541      trans_table[i-1]  = trans_table[i];
542      trans_tableP[i-1] = trans_tableP[i];
543      trans_table[i] = tmpE;
544      trans_tableP[i] = tmpP;
545      i--;
546   }
547
548   __res = (HWord)trans_tableP[i];
549   if (0) printf("%p\n", (void*)__res);
550   return __res;
551}
552
553#define N_TRANSBUF 5000
554static UChar transbuf[N_TRANSBUF];
555void make_translation ( Addr64 guest_addr, Bool verbose )
556{
557   VexTranslateArgs   vta;
558   VexTranslateResult tres;
559   VexArchInfo vex_archinfo;
560   Int trans_used, i, ws_needed;
561
562   memset(&vta, 0, sizeof(vta));
563   memset(&tres, 0, sizeof(tres));
564   memset(&vex_archinfo, 0, sizeof(vex_archinfo));
565
566   if (trans_table_used >= N_TRANS_TABLE
567       || trans_cache_used >= N_TRANS_CACHE-1000) {
568      /* If things are looking to full, just dump
569         all the translations. */
570      trans_cache_used = 0;
571      trans_table_used = 0;
572   }
573
574   assert(trans_table_used < N_TRANS_TABLE);
575   if (0)
576      printf("make translation %p\n", ULong_to_Ptr(guest_addr));
577
578   LibVEX_default_VexArchInfo(&vex_archinfo);
579   //vex_archinfo.subarch = VexSubArch;
580   //vex_archinfo.ppc_icache_line_szB = CacheLineSize;
581
582   /* */
583   vta.arch_guest       = VexArch;
584   vta.archinfo_guest   = vex_archinfo;
585   vta.arch_host        = VexArch;
586   vta.archinfo_host    = vex_archinfo;
587   vta.guest_bytes      = (UChar*)ULong_to_Ptr(guest_addr);
588   vta.guest_bytes_addr = (Addr64)guest_addr;
589   vta.chase_into_ok    = chase_into_ok;
590//   vta.guest_extents    = &vge;
591   vta.guest_extents    = &trans_table[trans_table_used];
592   vta.host_bytes       = transbuf;
593   vta.host_bytes_size  = N_TRANSBUF;
594   vta.host_bytes_used  = &trans_used;
595   vta.instrument1      = NULL;
596   vta.instrument2      = NULL;
597   vta.needs_self_check = needs_self_check;
598   vta.traceflags       = verbose ? TEST_FLAGS : DEBUG_TRACE_FLAGS;
599
600   vta.disp_cp_chain_me_to_slowEP = NULL; //disp_chain_fast;
601   vta.disp_cp_chain_me_to_fastEP = NULL; //disp_chain_slow;
602   vta.disp_cp_xindir             = NULL; //disp_chain_indir;
603   vta.disp_cp_xassisted          = disp_chain_assisted;
604
605   vta.addProfInc       = False;
606
607   tres = LibVEX_Translate ( &vta );
608
609   assert(tres.status == VexTransOK);
610   assert(tres.offs_profInc == -1);
611
612   ws_needed = (trans_used+7) / 8;
613   assert(ws_needed > 0);
614   assert(trans_cache_used + ws_needed < N_TRANS_CACHE);
615   n_translations_made++;
616
617   for (i = 0; i < trans_used; i++) {
618      HChar* dst = ((HChar*)(&trans_cache[trans_cache_used])) + i;
619      HChar* src = (HChar*)(&transbuf[i]);
620      *dst = *src;
621   }
622
623#if defined(__aarch64__)
624   invalidate_icache( &trans_cache[trans_cache_used], trans_used );
625#endif
626
627   trans_tableP[trans_table_used] = &trans_cache[trans_cache_used];
628   trans_table_used++;
629   trans_cache_used += ws_needed;
630}
631
632
633__attribute__((unused))
634static Bool overlap ( Addr64 start, UInt len, VexGuestExtents* vge )
635{
636   Int i;
637   for (i = 0; i < vge->n_used; i++) {
638     if (vge->base[i]+vge->len[i] <= start
639         || vge->base[i] >= start+len) {
640       /* ok */
641     } else {
642        return True;
643     }
644   }
645   return False; /* no overlap */
646}
647
648static ULong  stopAfter = 0;
649static UChar* entryP    = NULL;
650
651
652__attribute__ ((noreturn))
653static
654void failure_exit ( void )
655{
656   fprintf(stdout, "VEX did failure_exit.  Bye.\n");
657   fprintf(stdout, "bb counter = %llu\n\n", n_bbs_done);
658   exit(1);
659}
660
661static
662void log_bytes ( HChar* bytes, Int nbytes )
663{
664   fwrite ( bytes, 1, nbytes, stdout );
665   fflush ( stdout );
666}
667
668
669/* run simulated code forever (it will exit by calling
670   serviceFn(0)). */
671static void run_simulator ( void )
672{
673   static Addr64 last_guest = 0;
674   Addr64 next_guest;
675   HWord next_host;
676   while (1) {
677      next_guest = gst.GuestPC;
678
679      if (0)
680         printf("\nnext_guest: 0x%x\n", (UInt)next_guest);
681
682      if (next_guest == Ptr_to_ULong(&serviceFn)) {
683
684         /* "do" the function call to serviceFn */
685#        if defined(__i386__)
686         {
687            HWord esp = gst.guest_ESP;
688            gst.guest_EIP = *(UInt*)(esp+0);
689            gst.guest_EAX = serviceFn( *(UInt*)(esp+4), *(UInt*)(esp+8) );
690            gst.guest_ESP = esp+4;
691            next_guest = gst.guest_EIP;
692         }
693#        elif defined(__aarch64__)
694         {
695            gst.guest_X0 = serviceFn( gst.guest_X0, gst.guest_X1 );
696            gst.guest_PC = gst.guest_X30;
697            next_guest   = gst.guest_PC;
698         }
699#        else
700#        error "Unknown arch"
701#        endif
702      }
703
704      next_host = find_translation(next_guest);
705      if (next_host == 0) {
706         make_translation(next_guest,False);
707         next_host = find_translation(next_guest);
708         assert(next_host != 0);
709      }
710
711      // Switchback
712      if (n_bbs_done == stopAfter) {
713         printf("---begin SWITCHBACK at bb:%llu---\n", n_bbs_done);
714#if 1
715         if (last_guest) {
716            printf("\n*** Last run translation (bb:%llu):\n", n_bbs_done-1);
717            make_translation(last_guest,True);
718         }
719#endif
720#if 0
721         if (next_guest) {
722            printf("\n*** Current translation (bb:%llu):\n", n_bbs_done);
723            make_translation(next_guest,True);
724         }
725#endif
726         printf("---  end SWITCHBACK at bb:%llu ---\n", n_bbs_done);
727         switchback();
728         assert(0); /*NOTREACHED*/
729      }
730
731      last_guest = next_guest;
732      HWord trc = run_translation(next_host);
733      if (0) printf("------- trc = %lu\n", trc);
734      if (trc != VEX_TRC_JMP_BORING) {
735        if (1) printf("------- trc = %lu\n", trc);
736      }
737      assert(trc == VEX_TRC_JMP_BORING);
738   }
739}
740
741
742static void usage ( void )
743{
744   printf("usage: switchback #bbs\n");
745   printf("   - begins switchback for basic block #bbs\n");
746   printf("   - use -1 for largest possible run without switchback\n\n");
747   exit(1);
748}
749
750
751int main ( Int argc, HChar** argv )
752{
753   if (argc != 2)
754      usage();
755
756   stopAfter = (ULong)atoll(argv[1]);
757
758   extern void entry ( void*(*service)(int,int) );
759   entryP = (UChar*)&entry;
760
761   if (!entryP) {
762      printf("switchback: can't find entry point\n");
763      exit(1);
764   }
765
766   LibVEX_default_VexControl(&vcon);
767   vcon.guest_max_insns=50 - 49;
768   vcon.guest_chase_thresh=0;
769   vcon.iropt_level=2;
770
771   LibVEX_Init( failure_exit, log_bytes, 1, False, &vcon );
772   LibVEX_Guest_initialise(&gst);
773   gst.host_EvC_COUNTER  = 999999999; // so we should never get an exit
774   gst.host_EvC_FAILADDR = 0x5a5a5a5a5a5a5a5a;
775
776   /* set up as if a call to the entry point passing serviceFn as
777      the one and only parameter */
778#  if defined(__i386__)
779   gst.guest_EIP = (UInt)entryP;
780   gst.guest_ESP = (UInt)&gstack[32000];
781   *(UInt*)(gst.guest_ESP+4) = (UInt)serviceFn;
782   *(UInt*)(gst.guest_ESP+0) = 0x12345678;
783
784#  elif defined(__aarch64__)
785   gst.guest_PC = (ULong)entryP;
786   gst.guest_SP = (ULong)&gstack[32000];
787   gst.guest_X0 = (ULong)serviceFn;
788   HWord tpidr_el0 = 0;
789   __asm__ __volatile__("mrs %0, tpidr_el0" : "=r"(tpidr_el0));
790   gst.guest_TPIDR_EL0 = tpidr_el0;
791
792#  else
793#  error "Unknown arch"
794#  endif
795
796   printf("\n---START---\n");
797
798#if 1
799   run_simulator();
800#else
801   ( (void(*)(HWord(*)(HWord,HWord))) entryP ) (serviceFn);
802#endif
803
804
805   return 0;
806}
807