m_translate.c revision 9bea4c13fca0e3bb4b719dcb3ed63d47d479294e
1
2/*--------------------------------------------------------------------*/
3/*--- Interface to LibVEX_Translate, and the SP-update pass        ---*/
4/*---                                                m_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2010 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_vki.h"
34#include "pub_core_aspacemgr.h"
35
36#include "pub_core_machine.h"    // VG_(fnptr_to_fnentry)
37                                 // VG_(get_SP)
38                                 // VG_(machine_get_VexArchInfo)
39#include "pub_core_libcbase.h"
40#include "pub_core_libcassert.h"
41#include "pub_core_libcprint.h"
42#include "pub_core_options.h"
43
44#include "pub_core_debuginfo.h"  // VG_(get_fnname_w_offset)
45#include "pub_core_redir.h"      // VG_(redir_do_lookup)
46
47#include "pub_core_signals.h"    // VG_(synth_fault_{perms,mapping}
48#include "pub_core_stacks.h"     // VG_(unknown_SP_update)()
49#include "pub_core_tooliface.h"  // VG_(tdict)
50
51#include "pub_core_translate.h"
52#include "pub_core_transtab.h"
53#include "pub_core_dispatch.h" // VG_(run_innerloop__dispatch_{un}profiled)
54                               // VG_(run_a_noredir_translation__return_point)
55
56#include "pub_core_threadstate.h"  // VexGuestArchState
57#include "pub_core_trampoline.h"   // VG_(ppctoc_magic_redirect_return_stub)
58
59#include "pub_core_execontext.h"  // VG_(make_depth_1_ExeContext_from_Addr)
60
61
62/*------------------------------------------------------------*/
63/*--- Stats                                                ---*/
64/*------------------------------------------------------------*/
65
66static UInt n_SP_updates_fast            = 0;
67static UInt n_SP_updates_generic_known   = 0;
68static UInt n_SP_updates_generic_unknown = 0;
69
70void VG_(print_translation_stats) ( void )
71{
72   Char buf[6];
73   UInt n_SP_updates = n_SP_updates_fast + n_SP_updates_generic_known
74                                         + n_SP_updates_generic_unknown;
75   VG_(percentify)(n_SP_updates_fast, n_SP_updates, 1, 6, buf);
76   VG_(message)(Vg_DebugMsg,
77      "translate:            fast SP updates identified: %'u (%s)\n",
78      n_SP_updates_fast, buf );
79
80   VG_(percentify)(n_SP_updates_generic_known, n_SP_updates, 1, 6, buf);
81   VG_(message)(Vg_DebugMsg,
82      "translate:   generic_known SP updates identified: %'u (%s)\n",
83      n_SP_updates_generic_known, buf );
84
85   VG_(percentify)(n_SP_updates_generic_unknown, n_SP_updates, 1, 6, buf);
86   VG_(message)(Vg_DebugMsg,
87      "translate: generic_unknown SP updates identified: %'u (%s)\n",
88      n_SP_updates_generic_unknown, buf );
89}
90
91/*------------------------------------------------------------*/
92/*--- %SP-update pass                                      ---*/
93/*------------------------------------------------------------*/
94
95static Bool need_to_handle_SP_assignment(void)
96{
97   return ( VG_(tdict).track_new_mem_stack_4   ||
98            VG_(tdict).track_die_mem_stack_4   ||
99            VG_(tdict).track_new_mem_stack_8   ||
100            VG_(tdict).track_die_mem_stack_8   ||
101            VG_(tdict).track_new_mem_stack_12  ||
102            VG_(tdict).track_die_mem_stack_12  ||
103            VG_(tdict).track_new_mem_stack_16  ||
104            VG_(tdict).track_die_mem_stack_16  ||
105            VG_(tdict).track_new_mem_stack_32  ||
106            VG_(tdict).track_die_mem_stack_32  ||
107            VG_(tdict).track_new_mem_stack_112 ||
108            VG_(tdict).track_die_mem_stack_112 ||
109            VG_(tdict).track_new_mem_stack_128 ||
110            VG_(tdict).track_die_mem_stack_128 ||
111            VG_(tdict).track_new_mem_stack_144 ||
112            VG_(tdict).track_die_mem_stack_144 ||
113            VG_(tdict).track_new_mem_stack_160 ||
114            VG_(tdict).track_die_mem_stack_160 ||
115            VG_(tdict).track_new_mem_stack     ||
116            VG_(tdict).track_die_mem_stack     );
117}
118
119// - The SP aliases are held in an array which is used as a circular buffer.
120//   This misses very few constant updates of SP (ie. < 0.1%) while using a
121//   small, constant structure that will also never fill up and cause
122//   execution to abort.
123// - Unused slots have a .temp value of 'IRTemp_INVALID'.
124// - 'next_SP_alias_slot' is the index where the next alias will be stored.
125// - If the buffer fills, we circle around and start over-writing
126//   non-IRTemp_INVALID values.  This is rare, and the overwriting of a
127//   value that would have subsequently be used is even rarer.
128// - Every slot below next_SP_alias_slot holds a non-IRTemp_INVALID value.
129//   The rest either all won't (if we haven't yet circled around) or all
130//   will (if we have circled around).
131
132typedef
133   struct {
134      IRTemp temp;
135      Long   delta;
136   }
137   SP_Alias;
138
139// With 32 slots the buffer fills very rarely -- eg. once in a run of GCC.
140// And I've tested with smaller values and the wrap-around case works ok.
141#define N_ALIASES    32
142static SP_Alias SP_aliases[N_ALIASES];
143static Int      next_SP_alias_slot = 0;
144
145static void clear_SP_aliases(void)
146{
147   Int i;
148   for (i = 0; i < N_ALIASES; i++) {
149      SP_aliases[i].temp  = IRTemp_INVALID;
150      SP_aliases[i].delta = 0;
151   }
152   next_SP_alias_slot = 0;
153}
154
155static void add_SP_alias(IRTemp temp, Long delta)
156{
157   vg_assert(temp != IRTemp_INVALID);
158   SP_aliases[ next_SP_alias_slot ].temp  = temp;
159   SP_aliases[ next_SP_alias_slot ].delta = delta;
160   next_SP_alias_slot++;
161   if (N_ALIASES == next_SP_alias_slot) next_SP_alias_slot = 0;
162}
163
164static Bool get_SP_delta(IRTemp temp, ULong* delta)
165{
166   Int i;      // i must be signed!
167   vg_assert(IRTemp_INVALID != temp);
168   // Search backwards between current buffer position and the start.
169   for (i = next_SP_alias_slot-1; i >= 0; i--) {
170      if (temp == SP_aliases[i].temp) {
171         *delta = SP_aliases[i].delta;
172         return True;
173      }
174   }
175   // Search backwards between the end and the current buffer position.
176   for (i = N_ALIASES-1; i >= next_SP_alias_slot; i--) {
177      if (temp == SP_aliases[i].temp) {
178         *delta = SP_aliases[i].delta;
179         return True;
180      }
181   }
182   return False;
183}
184
185static void update_SP_aliases(Long delta)
186{
187   Int i;
188   for (i = 0; i < N_ALIASES; i++) {
189      if (SP_aliases[i].temp == IRTemp_INVALID) {
190         return;
191      }
192      SP_aliases[i].delta += delta;
193   }
194}
195
196/* Given a guest IP, get an origin tag for a 1-element stack trace,
197   and wrap it up in an IR atom that can be passed as the origin-tag
198   value for a stack-adjustment helper function. */
199static IRExpr* mk_ecu_Expr ( Addr64 guest_IP )
200{
201   UInt ecu;
202   ExeContext* ec
203      = VG_(make_depth_1_ExeContext_from_Addr)( (Addr)guest_IP );
204   vg_assert(ec);
205   ecu = VG_(get_ECU_from_ExeContext)( ec );
206   vg_assert(VG_(is_plausible_ECU)(ecu));
207   /* This is always safe to do, since ecu is only 32 bits, and
208      HWord is 32 or 64. */
209   return mkIRExpr_HWord( (HWord)ecu );
210}
211
212
213/* For tools that want to know about SP changes, this pass adds
214   in the appropriate hooks.  We have to do it after the tool's
215   instrumentation, so the tool doesn't have to worry about the C calls
216   it adds in, and we must do it before register allocation because
217   spilled temps make it much harder to work out the SP deltas.
218   This it is done with Vex's "second instrumentation" pass.
219
220   Basically, we look for GET(SP)/PUT(SP) pairs and track constant
221   increments/decrements of SP between them.  (This requires tracking one or
222   more "aliases", which are not exact aliases but instead are tempregs
223   whose value is equal to the SP's plus or minus a known constant.)
224   If all the changes to SP leading up to a PUT(SP) are by known, small
225   constants, we can do a specific call to eg. new_mem_stack_4, otherwise
226   we fall back to the case that handles an unknown SP change.
227
228   There is some extra complexity to deal correctly with updates to
229   only parts of SP.  Bizarre, but it has been known to happen.
230*/
231static
232IRSB* vg_SP_update_pass ( void*             closureV,
233                          IRSB*             sb_in,
234                          VexGuestLayout*   layout,
235                          VexGuestExtents*  vge,
236                          IRType            gWordTy,
237                          IRType            hWordTy )
238{
239   Int         i, j, minoff_ST, maxoff_ST, sizeof_SP, offset_SP;
240   Int         first_SP, last_SP, first_Put, last_Put;
241   IRDirty     *dcall, *d;
242   IRStmt*     st;
243   IRExpr*     e;
244   IRRegArray* descr;
245   IRType      typeof_SP;
246   Long        delta, con;
247
248   /* Set up stuff for tracking the guest IP */
249   Bool   curr_IP_known = False;
250   Addr64 curr_IP       = 0;
251
252   /* Set up BB */
253   IRSB* bb     = emptyIRSB();
254   bb->tyenv    = deepCopyIRTypeEnv(sb_in->tyenv);
255   bb->next     = deepCopyIRExpr(sb_in->next);
256   bb->jumpkind = sb_in->jumpkind;
257
258   delta = 0;
259
260   sizeof_SP = layout->sizeof_SP;
261   offset_SP = layout->offset_SP;
262   typeof_SP = sizeof_SP==4 ? Ity_I32 : Ity_I64;
263   vg_assert(sizeof_SP == 4 || sizeof_SP == 8);
264
265   /* --- Start of #defines --- */
266
267#  define IS_ADD(op) (sizeof_SP==4 ? ((op)==Iop_Add32) : ((op)==Iop_Add64))
268#  define IS_SUB(op) (sizeof_SP==4 ? ((op)==Iop_Sub32) : ((op)==Iop_Sub64))
269
270#  define IS_ADD_OR_SUB(op) (IS_ADD(op) || IS_SUB(op))
271
272#  define GET_CONST(con)                                                \
273       (sizeof_SP==4 ? (Long)(Int)(con->Ico.U32)                        \
274                     : (Long)(con->Ico.U64))
275
276#  define DO_NEW(syze, tmpp)                                            \
277      do {                                                              \
278         Bool vanilla, w_ecu;                                           \
279         vg_assert(curr_IP_known);                                      \
280         vanilla = NULL != VG_(tdict).track_new_mem_stack_##syze;       \
281         w_ecu   = NULL != VG_(tdict).track_new_mem_stack_##syze##_w_ECU; \
282         vg_assert(!(vanilla && w_ecu)); /* can't have both */          \
283         if (!(vanilla || w_ecu))                                       \
284            goto generic;                                               \
285                                                                        \
286         /* I don't know if it's really necessary to say that the */    \
287         /* call reads the stack pointer.  But anyway, we do. */        \
288         if (w_ecu) {                                                   \
289            dcall = unsafeIRDirty_0_N(                                  \
290                       2/*regparms*/,                                   \
291                       "track_new_mem_stack_" #syze "_w_ECU",           \
292                       VG_(fnptr_to_fnentry)(                           \
293                          VG_(tdict).track_new_mem_stack_##syze##_w_ECU ), \
294                       mkIRExprVec_2(IRExpr_RdTmp(tmpp),                \
295                                     mk_ecu_Expr(curr_IP))              \
296                    );                                                  \
297         } else {                                                       \
298            dcall = unsafeIRDirty_0_N(                                  \
299                       1/*regparms*/,                                   \
300                       "track_new_mem_stack_" #syze ,                   \
301                       VG_(fnptr_to_fnentry)(                           \
302                          VG_(tdict).track_new_mem_stack_##syze ),      \
303                       mkIRExprVec_1(IRExpr_RdTmp(tmpp))                \
304                    );                                                  \
305         }                                                              \
306         dcall->nFxState = 1;                                           \
307         dcall->fxState[0].fx     = Ifx_Read;                           \
308         dcall->fxState[0].offset = layout->offset_SP;                  \
309         dcall->fxState[0].size   = layout->sizeof_SP;                  \
310                                                                        \
311         addStmtToIRSB( bb, IRStmt_Dirty(dcall) );                      \
312                                                                        \
313         tl_assert(syze > 0);                                           \
314         update_SP_aliases(syze);                                       \
315                                                                        \
316         n_SP_updates_fast++;                                           \
317                                                                        \
318      } while (0)
319
320#  define DO_DIE(syze, tmpp)                                            \
321      do {                                                              \
322         if (!VG_(tdict).track_die_mem_stack_##syze)                    \
323            goto generic;                                               \
324                                                                        \
325         /* I don't know if it's really necessary to say that the */    \
326         /* call reads the stack pointer.  But anyway, we do. */        \
327         dcall = unsafeIRDirty_0_N(                                     \
328                    1/*regparms*/,                                      \
329                    "track_die_mem_stack_" #syze,                       \
330                    VG_(fnptr_to_fnentry)(                              \
331                       VG_(tdict).track_die_mem_stack_##syze ),         \
332                    mkIRExprVec_1(IRExpr_RdTmp(tmpp))                   \
333                 );                                                     \
334         dcall->nFxState = 1;                                           \
335         dcall->fxState[0].fx     = Ifx_Read;                           \
336         dcall->fxState[0].offset = layout->offset_SP;                  \
337         dcall->fxState[0].size   = layout->sizeof_SP;                  \
338                                                                        \
339         addStmtToIRSB( bb, IRStmt_Dirty(dcall) );                      \
340                                                                        \
341         tl_assert(syze > 0);                                           \
342         update_SP_aliases(-(syze));                                    \
343                                                                        \
344         n_SP_updates_fast++;                                           \
345                                                                        \
346      } while (0)
347
348   /* --- End of #defines --- */
349
350   clear_SP_aliases();
351
352   for (i = 0; i <  sb_in->stmts_used; i++) {
353
354      st = sb_in->stmts[i];
355
356      if (st->tag == Ist_IMark) {
357         curr_IP_known = True;
358         curr_IP       = st->Ist.IMark.addr;
359      }
360
361      /* t = Get(sp):   curr = t, delta = 0 */
362      if (st->tag != Ist_WrTmp) goto case2;
363      e = st->Ist.WrTmp.data;
364      if (e->tag != Iex_Get)              goto case2;
365      if (e->Iex.Get.offset != offset_SP) goto case2;
366      if (e->Iex.Get.ty != typeof_SP)     goto case2;
367      vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP );
368      add_SP_alias(st->Ist.WrTmp.tmp, 0);
369      addStmtToIRSB( bb, st );
370      continue;
371
372     case2:
373      /* t' = curr +/- const:   curr = t',  delta +=/-= const */
374      if (st->tag != Ist_WrTmp) goto case3;
375      e = st->Ist.WrTmp.data;
376      if (e->tag != Iex_Binop) goto case3;
377      if (e->Iex.Binop.arg1->tag != Iex_RdTmp) goto case3;
378      if (!get_SP_delta(e->Iex.Binop.arg1->Iex.RdTmp.tmp, &delta)) goto case3;
379      if (e->Iex.Binop.arg2->tag != Iex_Const) goto case3;
380      if (!IS_ADD_OR_SUB(e->Iex.Binop.op)) goto case3;
381      con = GET_CONST(e->Iex.Binop.arg2->Iex.Const.con);
382      vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP );
383      if (IS_ADD(e->Iex.Binop.op)) {
384         add_SP_alias(st->Ist.WrTmp.tmp, delta + con);
385      } else {
386         add_SP_alias(st->Ist.WrTmp.tmp, delta - con);
387      }
388      addStmtToIRSB( bb, st );
389      continue;
390
391     case3:
392      /* t' = curr:   curr = t' */
393      if (st->tag != Ist_WrTmp) goto case4;
394      e = st->Ist.WrTmp.data;
395      if (e->tag != Iex_RdTmp) goto case4;
396      if (!get_SP_delta(e->Iex.RdTmp.tmp, &delta)) goto case4;
397      vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.WrTmp.tmp) == typeof_SP );
398      add_SP_alias(st->Ist.WrTmp.tmp, delta);
399      addStmtToIRSB( bb, st );
400      continue;
401
402     case4:
403      /* Put(sp) = curr */
404      /* More generally, we must correctly handle a Put which writes
405         any part of SP, not just the case where all of SP is
406         written. */
407      if (st->tag != Ist_Put) goto case5;
408      first_SP  = offset_SP;
409      last_SP   = first_SP + sizeof_SP - 1;
410      first_Put = st->Ist.Put.offset;
411      last_Put  = first_Put
412                  + sizeofIRType( typeOfIRExpr( bb->tyenv, st->Ist.Put.data ))
413                  - 1;
414      vg_assert(first_SP <= last_SP);
415      vg_assert(first_Put <= last_Put);
416
417      if (last_Put < first_SP || last_SP < first_Put)
418         goto case5; /* no overlap */
419
420      if (st->Ist.Put.data->tag == Iex_RdTmp
421          && get_SP_delta(st->Ist.Put.data->Iex.RdTmp.tmp, &delta)) {
422         IRTemp tttmp = st->Ist.Put.data->Iex.RdTmp.tmp;
423         /* Why should the following assertion hold?  Because any
424            alias added by put_SP_alias must be of a temporary which
425            has the same type as typeof_SP, and whose value is a Get
426            at exactly offset_SP of size typeof_SP.  Each call to
427            put_SP_alias is immediately preceded by an assertion that
428            we are putting in a binding for a correctly-typed
429            temporary. */
430         vg_assert( typeOfIRTemp(bb->tyenv, tttmp) == typeof_SP );
431         /* From the same type-and-offset-correctness argument, if
432            we found a useable alias, it must for an "exact" write of SP. */
433         vg_assert(first_SP == first_Put);
434         vg_assert(last_SP == last_Put);
435         switch (delta) {
436            case    0:                      addStmtToIRSB(bb,st); continue;
437            case    4: DO_DIE(  4,  tttmp); addStmtToIRSB(bb,st); continue;
438            case   -4: DO_NEW(  4,  tttmp); addStmtToIRSB(bb,st); continue;
439            case    8: DO_DIE(  8,  tttmp); addStmtToIRSB(bb,st); continue;
440            case   -8: DO_NEW(  8,  tttmp); addStmtToIRSB(bb,st); continue;
441            case   12: DO_DIE(  12, tttmp); addStmtToIRSB(bb,st); continue;
442            case  -12: DO_NEW(  12, tttmp); addStmtToIRSB(bb,st); continue;
443            case   16: DO_DIE(  16, tttmp); addStmtToIRSB(bb,st); continue;
444            case  -16: DO_NEW(  16, tttmp); addStmtToIRSB(bb,st); continue;
445            case   32: DO_DIE(  32, tttmp); addStmtToIRSB(bb,st); continue;
446            case  -32: DO_NEW(  32, tttmp); addStmtToIRSB(bb,st); continue;
447            case  112: DO_DIE( 112, tttmp); addStmtToIRSB(bb,st); continue;
448            case -112: DO_NEW( 112, tttmp); addStmtToIRSB(bb,st); continue;
449            case  128: DO_DIE( 128, tttmp); addStmtToIRSB(bb,st); continue;
450            case -128: DO_NEW( 128, tttmp); addStmtToIRSB(bb,st); continue;
451            case  144: DO_DIE( 144, tttmp); addStmtToIRSB(bb,st); continue;
452            case -144: DO_NEW( 144, tttmp); addStmtToIRSB(bb,st); continue;
453            case  160: DO_DIE( 160, tttmp); addStmtToIRSB(bb,st); continue;
454            case -160: DO_NEW( 160, tttmp); addStmtToIRSB(bb,st); continue;
455            default:
456               /* common values for ppc64: 144 128 160 112 176 */
457               n_SP_updates_generic_known++;
458               goto generic;
459         }
460      } else {
461         /* Deal with an unknown update to SP.  We're here because
462            either:
463            (1) the Put does not exactly cover SP; it is a partial update.
464                Highly unlikely, but has been known to happen for 16-bit
465                Windows apps running on Wine, doing 16-bit adjustments to
466                %sp.
467            (2) the Put does exactly cover SP, but we are unable to
468                determine how the value relates to the old SP.  In any
469                case, we cannot assume that the Put.data value is a tmp;
470                we must assume it can be anything allowed in flat IR (tmp
471                or const).
472         */
473         IRTemp  old_SP;
474         n_SP_updates_generic_unknown++;
475
476         // Nb: if all is well, this generic case will typically be
477         // called something like every 1000th SP update.  If it's more than
478         // that, the above code may be missing some cases.
479        generic:
480         /* Pass both the old and new SP values to this helper.  Also,
481            pass an origin tag, even if it isn't needed. */
482         old_SP = newIRTemp(bb->tyenv, typeof_SP);
483         addStmtToIRSB(
484            bb,
485            IRStmt_WrTmp( old_SP, IRExpr_Get(offset_SP, typeof_SP) )
486         );
487
488         /* Now we know what the old value of SP is.  But knowing the new
489            value is a bit tricky if there is a partial write. */
490         if (first_Put == first_SP && last_Put == last_SP) {
491           /* The common case, an exact write to SP.  So st->Ist.Put.data
492              does hold the new value; simple. */
493            vg_assert(curr_IP_known);
494            dcall = unsafeIRDirty_0_N(
495                       3/*regparms*/,
496                       "VG_(unknown_SP_update)",
497                       VG_(fnptr_to_fnentry)( &VG_(unknown_SP_update) ),
498                       mkIRExprVec_3( IRExpr_RdTmp(old_SP), st->Ist.Put.data,
499                                      mk_ecu_Expr(curr_IP) )
500                    );
501            addStmtToIRSB( bb, IRStmt_Dirty(dcall) );
502            /* don't forget the original assignment */
503            addStmtToIRSB( bb, st );
504         } else {
505            /* We have a partial update to SP.  We need to know what
506               the new SP will be, and hand that to the helper call,
507               but when the helper call happens, SP must hold the
508               value it had before the update.  Tricky.
509               Therefore use the following kludge:
510               1. do the partial SP update (Put)
511               2. Get the new SP value into a tmp, new_SP
512               3. Put old_SP
513               4. Call the helper
514               5. Put new_SP
515            */
516            IRTemp new_SP;
517            /* 1 */
518            addStmtToIRSB( bb, st );
519            /* 2 */
520            new_SP = newIRTemp(bb->tyenv, typeof_SP);
521            addStmtToIRSB(
522               bb,
523               IRStmt_WrTmp( new_SP, IRExpr_Get(offset_SP, typeof_SP) )
524            );
525            /* 3 */
526            addStmtToIRSB( bb, IRStmt_Put(offset_SP, IRExpr_RdTmp(old_SP) ));
527            /* 4 */
528            vg_assert(curr_IP_known);
529            dcall = unsafeIRDirty_0_N(
530                       3/*regparms*/,
531                       "VG_(unknown_SP_update)",
532                       VG_(fnptr_to_fnentry)( &VG_(unknown_SP_update) ),
533                       mkIRExprVec_3( IRExpr_RdTmp(old_SP),
534                                      IRExpr_RdTmp(new_SP),
535                                      mk_ecu_Expr(curr_IP) )
536                    );
537            addStmtToIRSB( bb, IRStmt_Dirty(dcall) );
538            /* 5 */
539            addStmtToIRSB( bb, IRStmt_Put(offset_SP, IRExpr_RdTmp(new_SP) ));
540         }
541
542         /* Forget what we already know. */
543         clear_SP_aliases();
544
545         /* If this is a Put of a tmp that exactly updates SP,
546            start tracking aliases against this tmp. */
547
548         if (first_Put == first_SP && last_Put == last_SP
549             && st->Ist.Put.data->tag == Iex_RdTmp) {
550            vg_assert( typeOfIRTemp(bb->tyenv, st->Ist.Put.data->Iex.RdTmp.tmp)
551                       == typeof_SP );
552            add_SP_alias(st->Ist.Put.data->Iex.RdTmp.tmp, 0);
553         }
554         continue;
555      }
556
557     case5:
558      /* PutI or Dirty call which overlaps SP: complain.  We can't
559         deal with SP changing in weird ways (well, we can, but not at
560         this time of night).  */
561      if (st->tag == Ist_PutI) {
562         descr = st->Ist.PutI.descr;
563         minoff_ST = descr->base;
564         maxoff_ST = descr->base
565                     + descr->nElems * sizeofIRType(descr->elemTy) - 1;
566         if (!(offset_SP > maxoff_ST
567               || (offset_SP + sizeof_SP - 1) < minoff_ST))
568            goto complain;
569      }
570      if (st->tag == Ist_Dirty) {
571         d = st->Ist.Dirty.details;
572         for (j = 0; j < d->nFxState; j++) {
573            minoff_ST = d->fxState[j].offset;
574            maxoff_ST = d->fxState[j].offset + d->fxState[j].size - 1;
575            if (d->fxState[j].fx == Ifx_Read || d->fxState[j].fx == Ifx_None)
576               continue;
577            if (!(offset_SP > maxoff_ST
578                  || (offset_SP + sizeof_SP - 1) < minoff_ST))
579               goto complain;
580         }
581      }
582
583      /* well, not interesting.  Just copy and keep going. */
584      addStmtToIRSB( bb, st );
585
586   } /* for (i = 0; i < sb_in->stmts_used; i++) */
587
588   return bb;
589
590  complain:
591   VG_(core_panic)("vg_SP_update_pass: PutI or Dirty which overlaps SP");
592
593#undef IS_ADD
594#undef IS_SUB
595#undef IS_ADD_OR_SUB
596#undef GET_CONST
597#undef DO_NEW
598#undef DO_DIE
599}
600
601/*------------------------------------------------------------*/
602/*--- Main entry point for the JITter.                     ---*/
603/*------------------------------------------------------------*/
604
605/* Extra comments re self-checking translations and self-modifying
606   code.  (JRS 14 Oct 05).
607
608   There are 3 modes:
609   (1) no checking: all code assumed to be not self-modifying
610   (2) partial: known-problematic situations get a self-check
611   (3) full checking: all translations get a self-check
612
613   As currently implemented, the default is (2).  (3) is always safe,
614   but very slow.  (1) works mostly, but fails for gcc nested-function
615   code which uses trampolines on the stack; this situation is
616   detected and handled by (2).
617
618   ----------
619
620   A more robust and transparent solution, which is not currently
621   implemented, is a variant of (2): if a translation is made from an
622   area which aspacem says does not have 'w' permission, then it can
623   be non-self-checking.  Otherwise, it needs a self-check.
624
625   This is complicated by Vex's basic-block chasing.  If a self-check
626   is requested, then Vex will not chase over basic block boundaries
627   (it's too complex).  However there is still a problem if it chases
628   from a non-'w' area into a 'w' area.
629
630   I think the right thing to do is:
631
632   - if a translation request starts in a 'w' area, ask for a
633     self-checking translation, and do not allow any chasing (make
634     chase_into_ok return False).  Note that the latter is redundant
635     in the sense that Vex won't chase anyway in this situation.
636
637   - if a translation request starts in a non-'w' area, do not ask for
638     a self-checking translation.  However, do not allow chasing (as
639     determined by chase_into_ok) to go into a 'w' area.
640
641   The result of this is that all code inside 'w' areas is self
642   checking.
643
644   To complete the trick, there is a caveat: we must watch the
645   client's mprotect calls.  If pages are changed from non-'w' to 'w'
646   then we should throw away all translations which intersect the
647   affected area, so as to force them to be redone with self-checks.
648
649   ----------
650
651   The above outlines the conditions under which bb chasing is allowed
652   from a self-modifying-code point of view.  There are other
653   situations pertaining to function redirection in which it is
654   necessary to disallow chasing, but those fall outside the scope of
655   this comment.
656*/
657
658
659/* Vex dumps the final code in here.  Then we can copy it off
660   wherever we like. */
661/* 60000: should agree with assertion in VG_(add_to_transtab) in
662   m_transtab.c. */
663#define N_TMPBUF 60000
664static UChar tmpbuf[N_TMPBUF];
665
666
667/* Function pointers we must supply to LibVEX in order that it
668   can bomb out and emit messages under Valgrind's control. */
669__attribute__ ((noreturn))
670static
671void failure_exit ( void )
672{
673   LibVEX_ShowAllocStats();
674   VG_(core_panic)("LibVEX called failure_exit().");
675}
676
677static
678void log_bytes ( HChar* bytes, Int nbytes )
679{
680  Int i;
681  for (i = 0; i < nbytes-3; i += 4)
682     VG_(printf)("%c%c%c%c", bytes[i], bytes[i+1], bytes[i+2], bytes[i+3]);
683  for (; i < nbytes; i++)
684     VG_(printf)("%c", bytes[i]);
685}
686
687
688/* --------- Various helper functions for translation --------- */
689
690/* Look for reasons to disallow making translations from the given
691   segment. */
692
693static Bool translations_allowable_from_seg ( NSegment const* seg )
694{
695#  if defined(VGA_x86)
696   Bool allowR = True;
697#  else
698   Bool allowR = False;
699#  endif
700   return seg != NULL
701          && (seg->kind == SkAnonC || seg->kind == SkFileC || seg->kind == SkShmC)
702          && (seg->hasX || (seg->hasR && allowR));
703}
704
705
706/* Is a self-check required for a translation of a guest address
707   inside segment SEG when requested by thread TID ? */
708
709static Bool self_check_required ( NSegment const* seg, ThreadId tid )
710{
711#if defined(VGO_darwin)
712   // GrP fixme hack - dyld i386 IMPORT gets rewritten
713   // to really do this correctly, we'd need to flush the
714   // translation cache whenever a segment became +WX
715   if (seg->hasX  && seg->hasW) {
716      return True;
717   }
718#endif
719   switch (VG_(clo_smc_check)) {
720      case Vg_SmcNone:  return False;
721      case Vg_SmcAll:   return True;
722      case Vg_SmcStack:
723         return seg
724                ? (seg->start <= VG_(get_SP)(tid)
725                   && VG_(get_SP)(tid)+sizeof(Word)-1 <= seg->end)
726                : False;
727         break;
728      default:
729         vg_assert2(0, "unknown VG_(clo_smc_check) value");
730   }
731}
732
733
734/* This is a callback passed to LibVEX_Translate.  It stops Vex from
735   chasing into function entry points that we wish to redirect.
736   Chasing across them obviously defeats the redirect mechanism, with
737   bad effects for Memcheck, Addrcheck, and possibly others.
738
739   Also, we must stop Vex chasing into blocks for which we might want
740   to self checking.
741*/
742static Bool chase_into_ok ( void* closureV, Addr64 addr64 )
743{
744   Addr               addr    = (Addr)addr64;
745   NSegment const*    seg     = VG_(am_find_nsegment)(addr);
746   VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
747
748   /* Work through a list of possibilities why we might not want to
749      allow a chase. */
750
751   /* Destination not in a plausible segment? */
752   if (!translations_allowable_from_seg(seg))
753      goto dontchase;
754
755   /* Destination requires a self-check? */
756   if (self_check_required(seg, closure->tid))
757      goto dontchase;
758
759   /* Destination is redirected? */
760   if (addr != VG_(redir_do_lookup)(addr, NULL))
761      goto dontchase;
762
763#  if defined(VG_PLAT_USES_PPCTOC)
764   /* This needs to be at the start of its own block.  Don't chase. Re
765      ULong_to_Ptr, be careful to ensure we only compare 32 bits on a
766      32-bit target.*/
767   if (ULong_to_Ptr(addr64)
768       == (void*)&VG_(ppctoc_magic_redirect_return_stub))
769      goto dontchase;
770#  endif
771
772   /* overly conservative, but .. don't chase into the distinguished
773      address that m_transtab uses as an empty-slot marker for
774      VG_(tt_fast). */
775   if (addr == TRANSTAB_BOGUS_GUEST_ADDR)
776      goto dontchase;
777
778   /* well, ok then.  go on and chase. */
779   return True;
780
781   vg_assert(0);
782   /*NOTREACHED*/
783
784  dontchase:
785   if (0) VG_(printf)("not chasing into 0x%lx\n", addr);
786   return False;
787}
788
789
790/* --------------- helpers for with-TOC platforms --------------- */
791
792/* NOTE: with-TOC platforms are: ppc64-linux, ppc32-aix5, ppc64-aix5. */
793
794static IRExpr* mkU64 ( ULong n ) {
795   return IRExpr_Const(IRConst_U64(n));
796}
797static IRExpr* mkU32 ( UInt n ) {
798   return IRExpr_Const(IRConst_U32(n));
799}
800
801#if defined(VG_PLAT_USES_PPCTOC)
802static IRExpr* mkU8 ( UChar n ) {
803   return IRExpr_Const(IRConst_U8(n));
804}
805static IRExpr* narrowTo32 ( IRTypeEnv* tyenv, IRExpr* e ) {
806   if (typeOfIRExpr(tyenv, e) == Ity_I32) {
807      return e;
808   } else {
809      vg_assert(typeOfIRExpr(tyenv, e) == Ity_I64);
810      return IRExpr_Unop(Iop_64to32, e);
811   }
812}
813
814/* Generate code to push word-typed expression 'e' onto this thread's
815   redir stack, checking for stack overflow and generating code to
816   bomb out if so. */
817
818static void gen_PUSH ( IRSB* bb, IRExpr* e )
819{
820   IRRegArray* descr;
821   IRTemp      t1;
822   IRExpr*     one;
823
824#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
825   Int    stack_size       = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
826   Int    offB_REDIR_SP    = offsetof(VexGuestPPC64State,guest_REDIR_SP);
827   Int    offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
828   Int    offB_EMWARN      = offsetof(VexGuestPPC64State,guest_EMWARN);
829   Bool   is64             = True;
830   IRType ty_Word          = Ity_I64;
831   IROp   op_CmpNE         = Iop_CmpNE64;
832   IROp   op_Sar           = Iop_Sar64;
833   IROp   op_Sub           = Iop_Sub64;
834   IROp   op_Add           = Iop_Add64;
835   IRExpr*(*mkU)(ULong)    = mkU64;
836   vg_assert(VG_WORDSIZE == 8);
837#  else
838   Int    stack_size       = VEX_GUEST_PPC32_REDIR_STACK_SIZE;
839   Int    offB_REDIR_SP    = offsetof(VexGuestPPC32State,guest_REDIR_SP);
840   Int    offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
841   Int    offB_EMWARN      = offsetof(VexGuestPPC32State,guest_EMWARN);
842   Bool   is64             = False;
843   IRType ty_Word          = Ity_I32;
844   IROp   op_CmpNE         = Iop_CmpNE32;
845   IROp   op_Sar           = Iop_Sar32;
846   IROp   op_Sub           = Iop_Sub32;
847   IROp   op_Add           = Iop_Add32;
848   IRExpr*(*mkU)(UInt)     = mkU32;
849   vg_assert(VG_WORDSIZE == 4);
850#  endif
851
852   vg_assert(sizeof(void*) == VG_WORDSIZE);
853   vg_assert(sizeof(Word)  == VG_WORDSIZE);
854   vg_assert(sizeof(Addr)  == VG_WORDSIZE);
855
856   descr = mkIRRegArray( offB_REDIR_STACK, ty_Word, stack_size );
857   t1    = newIRTemp( bb->tyenv, ty_Word );
858   one   = mkU(1);
859
860   vg_assert(typeOfIRExpr(bb->tyenv, e) == ty_Word);
861
862   /* t1 = guest_REDIR_SP + 1 */
863   addStmtToIRSB(
864      bb,
865      IRStmt_WrTmp(
866         t1,
867         IRExpr_Binop(op_Add, IRExpr_Get( offB_REDIR_SP, ty_Word ), one)
868      )
869   );
870
871   /* Bomb out if t1 >=s stack_size, that is, (stack_size-1)-t1 <s 0.
872      The destination (0) is a bit bogus but it doesn't matter since
873      this is an unrecoverable error and will lead to Valgrind
874      shutting down.  _EMWARN is set regardless - that's harmless
875      since is only has a meaning if the exit is taken. */
876   addStmtToIRSB(
877      bb,
878      IRStmt_Put(offB_EMWARN, mkU32(EmWarn_PPC64_redir_overflow))
879   );
880   addStmtToIRSB(
881      bb,
882      IRStmt_Exit(
883         IRExpr_Binop(
884            op_CmpNE,
885            IRExpr_Binop(
886               op_Sar,
887               IRExpr_Binop(op_Sub,mkU(stack_size-1),IRExpr_RdTmp(t1)),
888               mkU8(8 * VG_WORDSIZE - 1)
889            ),
890            mkU(0)
891         ),
892         Ijk_EmFail,
893         is64 ? IRConst_U64(0) : IRConst_U32(0)
894      )
895   );
896
897   /* guest_REDIR_SP = t1 */
898   addStmtToIRSB(bb, IRStmt_Put(offB_REDIR_SP, IRExpr_RdTmp(t1)));
899
900   /* guest_REDIR_STACK[t1+0] = e */
901   /* PutI/GetI have I32-typed indexes regardless of guest word size */
902   addStmtToIRSB(
903      bb,
904      IRStmt_PutI(descr, narrowTo32(bb->tyenv,IRExpr_RdTmp(t1)), 0, e)
905   );
906}
907
908
909/* Generate code to pop a word-sized value from this thread's redir
910   stack, binding it to a new temporary, which is returned.  As with
911   gen_PUSH, an overflow check is also performed. */
912
913static IRTemp gen_POP ( IRSB* bb )
914{
915#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
916   Int    stack_size       = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
917   Int    offB_REDIR_SP    = offsetof(VexGuestPPC64State,guest_REDIR_SP);
918   Int    offB_REDIR_STACK = offsetof(VexGuestPPC64State,guest_REDIR_STACK);
919   Int    offB_EMWARN      = offsetof(VexGuestPPC64State,guest_EMWARN);
920   Bool   is64             = True;
921   IRType ty_Word          = Ity_I64;
922   IROp   op_CmpNE         = Iop_CmpNE64;
923   IROp   op_Sar           = Iop_Sar64;
924   IROp   op_Sub           = Iop_Sub64;
925   IRExpr*(*mkU)(ULong)    = mkU64;
926#  else
927   Int    stack_size       = VEX_GUEST_PPC32_REDIR_STACK_SIZE;
928   Int    offB_REDIR_SP    = offsetof(VexGuestPPC32State,guest_REDIR_SP);
929   Int    offB_REDIR_STACK = offsetof(VexGuestPPC32State,guest_REDIR_STACK);
930   Int    offB_EMWARN      = offsetof(VexGuestPPC32State,guest_EMWARN);
931   Bool   is64             = False;
932   IRType ty_Word          = Ity_I32;
933   IROp   op_CmpNE         = Iop_CmpNE32;
934   IROp   op_Sar           = Iop_Sar32;
935   IROp   op_Sub           = Iop_Sub32;
936   IRExpr*(*mkU)(UInt)     = mkU32;
937#  endif
938
939   IRRegArray* descr = mkIRRegArray( offB_REDIR_STACK, ty_Word, stack_size );
940   IRTemp      t1    = newIRTemp( bb->tyenv, ty_Word );
941   IRTemp      res   = newIRTemp( bb->tyenv, ty_Word );
942   IRExpr*     one   = mkU(1);
943
944   vg_assert(sizeof(void*) == VG_WORDSIZE);
945   vg_assert(sizeof(Word)  == VG_WORDSIZE);
946   vg_assert(sizeof(Addr)  == VG_WORDSIZE);
947
948   /* t1 = guest_REDIR_SP */
949   addStmtToIRSB(
950      bb,
951      IRStmt_WrTmp( t1, IRExpr_Get( offB_REDIR_SP, ty_Word ) )
952   );
953
954   /* Bomb out if t1 < 0.  Same comments as gen_PUSH apply. */
955   addStmtToIRSB(
956      bb,
957      IRStmt_Put(offB_EMWARN, mkU32(EmWarn_PPC64_redir_underflow))
958   );
959   addStmtToIRSB(
960      bb,
961      IRStmt_Exit(
962         IRExpr_Binop(
963            op_CmpNE,
964            IRExpr_Binop(
965               op_Sar,
966               IRExpr_RdTmp(t1),
967               mkU8(8 * VG_WORDSIZE - 1)
968            ),
969            mkU(0)
970         ),
971         Ijk_EmFail,
972         is64 ? IRConst_U64(0) : IRConst_U32(0)
973      )
974   );
975
976   /* res = guest_REDIR_STACK[t1+0] */
977   /* PutI/GetI have I32-typed indexes regardless of guest word size */
978   addStmtToIRSB(
979      bb,
980      IRStmt_WrTmp(
981         res,
982         IRExpr_GetI(descr, narrowTo32(bb->tyenv,IRExpr_RdTmp(t1)), 0)
983      )
984   );
985
986   /* guest_REDIR_SP = t1-1 */
987   addStmtToIRSB(
988      bb,
989      IRStmt_Put(offB_REDIR_SP, IRExpr_Binop(op_Sub, IRExpr_RdTmp(t1), one))
990   );
991
992   return res;
993}
994
995/* Generate code to push LR and R2 onto this thread's redir stack,
996   then set R2 to the new value (which is the TOC pointer to be used
997   for the duration of the replacement function, as determined by
998   m_debuginfo), and set LR to the magic return stub, so we get to
999   intercept the return and restore R2 and L2 to the values saved
1000   here. */
1001
1002static void gen_push_and_set_LR_R2 ( IRSB* bb, Addr64 new_R2_value )
1003{
1004#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
1005   Addr64 bogus_RA  = (Addr64)&VG_(ppctoc_magic_redirect_return_stub);
1006   Int    offB_GPR2 = offsetof(VexGuestPPC64State,guest_GPR2);
1007   Int    offB_LR   = offsetof(VexGuestPPC64State,guest_LR);
1008   gen_PUSH( bb, IRExpr_Get(offB_LR,   Ity_I64) );
1009   gen_PUSH( bb, IRExpr_Get(offB_GPR2, Ity_I64) );
1010   addStmtToIRSB( bb, IRStmt_Put( offB_LR,   mkU64( bogus_RA )) );
1011   addStmtToIRSB( bb, IRStmt_Put( offB_GPR2, mkU64( new_R2_value )) );
1012
1013#  elif defined(VGP_ppc32_aix5)
1014   Addr32 bogus_RA  = (Addr32)&VG_(ppctoc_magic_redirect_return_stub);
1015   Int    offB_GPR2 = offsetof(VexGuestPPC32State,guest_GPR2);
1016   Int    offB_LR   = offsetof(VexGuestPPC32State,guest_LR);
1017   gen_PUSH( bb, IRExpr_Get(offB_LR,   Ity_I32) );
1018   gen_PUSH( bb, IRExpr_Get(offB_GPR2, Ity_I32) );
1019   addStmtToIRSB( bb, IRStmt_Put( offB_LR,   mkU32( bogus_RA )) );
1020   addStmtToIRSB( bb, IRStmt_Put( offB_GPR2, mkU32( new_R2_value )) );
1021
1022#  else
1023#    error Platform is not TOC-afflicted, fortunately
1024#  endif
1025}
1026
1027static void gen_pop_R2_LR_then_bLR ( IRSB* bb )
1028{
1029#  if defined(VGP_ppc64_linux) || defined(VGP_ppc64_aix5)
1030   Int    offB_GPR2 = offsetof(VexGuestPPC64State,guest_GPR2);
1031   Int    offB_LR   = offsetof(VexGuestPPC64State,guest_LR);
1032   IRTemp old_R2    = newIRTemp( bb->tyenv, Ity_I64 );
1033   IRTemp old_LR    = newIRTemp( bb->tyenv, Ity_I64 );
1034   /* Restore R2 */
1035   old_R2 = gen_POP( bb );
1036   addStmtToIRSB( bb, IRStmt_Put( offB_GPR2, IRExpr_RdTmp(old_R2)) );
1037   /* Restore LR */
1038   old_LR = gen_POP( bb );
1039   addStmtToIRSB( bb, IRStmt_Put( offB_LR, IRExpr_RdTmp(old_LR)) );
1040   /* Branch to LR */
1041   /* re boring, we arrived here precisely because a wrapped fn did a
1042      blr (hence Ijk_Ret); so we should just mark this jump as Boring,
1043      else one _Call will have resulted in two _Rets. */
1044   bb->jumpkind = Ijk_Boring;
1045   bb->next = IRExpr_Binop(Iop_And64, IRExpr_RdTmp(old_LR), mkU64(~(3ULL)));
1046
1047#  elif defined(VGP_ppc32_aix5)
1048   Int    offB_GPR2 = offsetof(VexGuestPPC32State,guest_GPR2);
1049   Int    offB_LR   = offsetof(VexGuestPPC32State,guest_LR);
1050   IRTemp old_R2    = newIRTemp( bb->tyenv, Ity_I32 );
1051   IRTemp old_LR    = newIRTemp( bb->tyenv, Ity_I32 );
1052   /* Restore R2 */
1053   old_R2 = gen_POP( bb );
1054   addStmtToIRSB( bb, IRStmt_Put( offB_GPR2, IRExpr_RdTmp(old_R2)) );
1055   /* Restore LR */
1056   old_LR = gen_POP( bb );
1057   addStmtToIRSB( bb, IRStmt_Put( offB_LR, IRExpr_RdTmp(old_LR)) );
1058
1059   /* Branch to LR */
1060   /* re boring, we arrived here precisely because a wrapped fn did a
1061      blr (hence Ijk_Ret); so we should just mark this jump as Boring,
1062      else one _Call will have resulted in two _Rets. */
1063   bb->jumpkind = Ijk_Boring;
1064   bb->next = IRExpr_Binop(Iop_And32, IRExpr_RdTmp(old_LR), mkU32(~3));
1065
1066#  else
1067#    error Platform is not TOC-afflicted, fortunately
1068#  endif
1069}
1070
1071static
1072Bool mk_preamble__ppctoc_magic_return_stub ( void* closureV, IRSB* bb )
1073{
1074   VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
1075   /* Since we're creating the entire IRSB right here, give it a
1076      proper IMark, as it won't get one any other way, and cachegrind
1077      will barf if it doesn't have one (fair enough really). */
1078   addStmtToIRSB( bb, IRStmt_IMark( closure->readdr, 4 ) );
1079   /* Generate the magic sequence:
1080         pop R2 from hidden stack
1081         pop LR from hidden stack
1082         goto LR
1083   */
1084   gen_pop_R2_LR_then_bLR(bb);
1085   return True; /* True == this is the entire BB; don't disassemble any
1086                   real insns into it - just hand it directly to
1087                   optimiser/instrumenter/backend. */
1088}
1089#endif
1090
1091/* --------------- END helpers for with-TOC platforms --------------- */
1092
1093
1094/* This is the IR preamble generator used for replacement
1095   functions.  It adds code to set the guest_NRADDR{_GPR2} to zero
1096   (technically not necessary, but facilitates detecting mixups in
1097   which a replacement function has been erroneously declared using
1098   VG_REPLACE_FUNCTION_Z{U,Z} when instead it should have been written
1099   using VG_WRAP_FUNCTION_Z{U,Z}).
1100
1101   On with-TOC platforms the follow hacks are also done: LR and R2 are
1102   pushed onto a hidden stack, R2 is set to the correct value for the
1103   replacement function, and LR is set to point at the magic
1104   return-stub address.  Setting LR causes the return of the
1105   wrapped/redirected function to lead to our magic return stub, which
1106   restores LR and R2 from said stack and returns for real.
1107
1108   VG_(get_StackTrace_wrk) understands that the LR value may point to
1109   the return stub address, and that in that case it can get the real
1110   LR value from the hidden stack instead. */
1111static
1112Bool mk_preamble__set_NRADDR_to_zero ( void* closureV, IRSB* bb )
1113{
1114   Int nraddr_szB
1115      = sizeof(((VexGuestArchState*)0)->guest_NRADDR);
1116   vg_assert(nraddr_szB == 4 || nraddr_szB == 8);
1117   vg_assert(nraddr_szB == VG_WORDSIZE);
1118   addStmtToIRSB(
1119      bb,
1120      IRStmt_Put(
1121         offsetof(VexGuestArchState,guest_NRADDR),
1122         nraddr_szB == 8 ? mkU64(0) : mkU32(0)
1123      )
1124   );
1125#  if defined(VG_PLAT_USES_PPCTOC)
1126   { VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
1127     addStmtToIRSB(
1128        bb,
1129        IRStmt_Put(
1130           offsetof(VexGuestArchState,guest_NRADDR_GPR2),
1131           VG_WORDSIZE==8 ? mkU64(0) : mkU32(0)
1132        )
1133     );
1134     gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
1135   }
1136#  endif
1137   return False;
1138}
1139
1140/* Ditto, except set guest_NRADDR to nraddr (the un-redirected guest
1141   address).  This is needed for function wrapping - so the wrapper
1142   can read _NRADDR and find the address of the function being
1143   wrapped.  On toc-afflicted platforms we must also snarf r2. */
1144static
1145Bool mk_preamble__set_NRADDR_to_nraddr ( void* closureV, IRSB* bb )
1146{
1147   VgCallbackClosure* closure = (VgCallbackClosure*)closureV;
1148   Int nraddr_szB
1149      = sizeof(((VexGuestArchState*)0)->guest_NRADDR);
1150   vg_assert(nraddr_szB == 4 || nraddr_szB == 8);
1151   vg_assert(nraddr_szB == VG_WORDSIZE);
1152   addStmtToIRSB(
1153      bb,
1154      IRStmt_Put(
1155         offsetof(VexGuestArchState,guest_NRADDR),
1156         nraddr_szB == 8
1157            ? IRExpr_Const(IRConst_U64( closure->nraddr ))
1158            : IRExpr_Const(IRConst_U32( (UInt)closure->nraddr ))
1159      )
1160   );
1161#  if defined(VGP_ppc64_linux) || defined(VGP_ppc32_aix5) \
1162                               || defined(VGP_ppc64_aix5)
1163   addStmtToIRSB(
1164      bb,
1165      IRStmt_Put(
1166         offsetof(VexGuestArchState,guest_NRADDR_GPR2),
1167         IRExpr_Get(offsetof(VexGuestArchState,guest_GPR2),
1168                    VG_WORDSIZE==8 ? Ity_I64 : Ity_I32)
1169      )
1170   );
1171   gen_push_and_set_LR_R2 ( bb, VG_(get_tocptr)( closure->readdr ) );
1172#  endif
1173   return False;
1174}
1175
1176/* --- Helpers to do with PPC related stack redzones. --- */
1177
1178__attribute__((unused))
1179static Bool const_True ( Addr64 guest_addr )
1180{
1181   return True;
1182}
1183
1184__attribute__((unused))
1185static Bool bl_RZ_zap_ok_for_AIX ( Addr64 bl_target )
1186{
1187   /* paranoia */
1188   if (sizeof(void*) == 4)
1189      bl_target &= 0xFFFFFFFFULL;
1190
1191   /* don't zap the redzone for calls to millicode. */
1192   if (bl_target < 0x10000ULL)
1193      return False;
1194
1195   /* don't zap the redzone for calls to .$SAVEF14 .. .$SAVEF31.
1196      First we need to be reasonably sure we won't segfault by looking
1197      at the branch target. */
1198   { NSegment const*const seg = VG_(am_find_nsegment)( (Addr)bl_target );
1199     if (seg && seg->hasR) {
1200        switch ( *(UInt*)(Addr)bl_target ) {
1201           case 0xd9c1ff70: /* stfd f14,-144(r1) */
1202           case 0xd9e1ff78: /* stfd f15,-136(r1) */
1203           case 0xda01ff80: /* stfd f16,-128(r1) */
1204           case 0xda21ff88: /* stfd f17,-120(r1) */
1205           case 0xda41ff90: /* stfd f18,-112(r1) */
1206           case 0xda61ff98: /* stfd f19,-104(r1) */
1207           case 0xda81ffa0: /* stfd f20,-96(r1) */
1208           case 0xdaa1ffa8: /* stfd f21,-88(r1) */
1209           case 0xdac1ffb0: /* stfd f22,-80(r1) */
1210           case 0xdae1ffb8: /* stfd f23,-72(r1) */
1211           case 0xdb01ffc0: /* stfd f24,-64(r1) */
1212           case 0xdb21ffc8: /* stfd f25,-56(r1) */
1213           case 0xdb41ffd0: /* stfd f26,-48(r1) */
1214           case 0xdb61ffd8: /* stfd f27,-40(r1) */
1215           case 0xdb81ffe0: /* stfd f28,-32(r1) */
1216           case 0xdba1ffe8: /* stfd f29,-24(r1) */
1217           case 0xdbc1fff0: /* stfd f30,-16(r1) */
1218           case 0xdbe1fff8: /* stfd f31,-8(r1) */
1219              return False;
1220        }
1221     }
1222   }
1223   return True;
1224}
1225
1226/* --------------- main translation function --------------- */
1227
1228/* Note: see comments at top of m_redir.c for the Big Picture on how
1229   redirections are managed. */
1230
1231typedef
1232   enum {
1233      /* normal translation, redir neither requested nor inhibited */
1234      T_Normal,
1235      /* redir translation, function-wrap (set _NRADDR) style */
1236      T_Redir_Wrap,
1237      /* redir translation, replacement (don't set _NRADDR) style */
1238      T_Redir_Replace,
1239      /* a translation in which redir is specifically disallowed */
1240      T_NoRedir
1241   }
1242   T_Kind;
1243
1244/* Translate the basic block beginning at NRADDR, and add it to the
1245   translation cache & translation table.  Unless
1246   DEBUGGING_TRANSLATION is true, in which case the call is being done
1247   for debugging purposes, so (a) throw away the translation once it
1248   is made, and (b) produce a load of debugging output.  If
1249   ALLOW_REDIRECTION is False, do not attempt redirection of NRADDR,
1250   and also, put the resulting translation into the no-redirect tt/tc
1251   instead of the normal one.
1252
1253   TID is the identity of the thread requesting this translation.
1254*/
1255
1256Bool VG_(translate) ( ThreadId tid,
1257                      Addr64   nraddr,
1258                      Bool     debugging_translation,
1259                      Int      debugging_verbosity,
1260                      ULong    bbs_done,
1261                      Bool     allow_redirection )
1262{
1263   Addr64             addr;
1264   T_Kind             kind;
1265   Int                tmpbuf_used, verbosity, i;
1266   Bool               notrace_until_done, do_self_check;
1267   UInt               notrace_until_limit = 0;
1268   Bool (*preamble_fn)(void*,IRSB*);
1269   VexArch            vex_arch;
1270   VexArchInfo        vex_archinfo;
1271   VexAbiInfo         vex_abiinfo;
1272   VexGuestExtents    vge;
1273   VexTranslateArgs   vta;
1274   VexTranslateResult tres;
1275   VgCallbackClosure  closure;
1276
1277   /* Make sure Vex is initialised right. */
1278
1279   static Bool vex_init_done = False;
1280
1281   if (!vex_init_done) {
1282      LibVEX_Init ( &failure_exit, &log_bytes,
1283                    1,     /* debug_paranoia */
1284                    False, /* valgrind support */
1285                    &VG_(clo_vex_control) );
1286      vex_init_done = True;
1287   }
1288
1289   /* Establish the translation kind and actual guest address to
1290      start from.  Sets (addr,kind). */
1291   if (allow_redirection) {
1292      Bool isWrap;
1293      Addr64 tmp = VG_(redir_do_lookup)( nraddr, &isWrap );
1294      if (tmp == nraddr) {
1295         /* no redirection found */
1296         addr = nraddr;
1297         kind = T_Normal;
1298      } else {
1299         /* found a redirect */
1300         addr = tmp;
1301         kind = isWrap ? T_Redir_Wrap : T_Redir_Replace;
1302      }
1303   } else {
1304      addr = nraddr;
1305      kind = T_NoRedir;
1306   }
1307
1308   /* Established: (nraddr, addr, kind) */
1309
1310   /* Printing redirection info. */
1311
1312   if ((kind == T_Redir_Wrap || kind == T_Redir_Replace)
1313       && (VG_(clo_verbosity) >= 2 || VG_(clo_trace_redir))) {
1314      Bool ok;
1315      Char name1[64] = "";
1316      Char name2[64] = "";
1317      name1[0] = name2[0] = 0;
1318      ok = VG_(get_fnname_w_offset)(nraddr, name1, 64);
1319      if (!ok) VG_(strcpy)(name1, "???");
1320      ok = VG_(get_fnname_w_offset)(addr, name2, 64);
1321      if (!ok) VG_(strcpy)(name2, "???");
1322      VG_(message)(Vg_DebugMsg,
1323                   "REDIR: 0x%llx (%s) redirected to 0x%llx (%s)\n",
1324                   nraddr, name1,
1325                   addr, name2 );
1326   }
1327
1328   /* If codegen tracing, don't start tracing until
1329      notrace_until_limit blocks have gone by.  This avoids printing
1330      huge amounts of useless junk when all we want to see is the last
1331      few blocks translated prior to a failure.  Set
1332      notrace_until_limit to be the number of translations to be made
1333      before --trace-codegen= style printing takes effect. */
1334   notrace_until_done
1335      = VG_(get_bbs_translated)() >= notrace_until_limit;
1336
1337   if (!debugging_translation)
1338      VG_TRACK( pre_mem_read, Vg_CoreTranslate,
1339                              tid, "(translator)", addr, 1 );
1340
1341   /* If doing any code printing, print a basic block start marker */
1342   if (VG_(clo_trace_flags) || debugging_translation) {
1343      Char fnname[64] = "";
1344      VG_(get_fnname_w_offset)(addr, fnname, 64);
1345      VG_(printf)(
1346              "==== SB %d [tid %d] %s(0x%llx) SBs exec'd %lld ====\n",
1347              VG_(get_bbs_translated)(), (Int)tid, fnname, addr,
1348              bbs_done);
1349   }
1350
1351   /* Are we allowed to translate here? */
1352
1353   { /* BEGIN new scope specially for 'seg' */
1354   NSegment const* seg = VG_(am_find_nsegment)(addr);
1355
1356   if ( (!translations_allowable_from_seg(seg))
1357        || addr == TRANSTAB_BOGUS_GUEST_ADDR ) {
1358      if (VG_(clo_trace_signals))
1359         VG_(message)(Vg_DebugMsg, "translations not allowed here (0x%llx)"
1360                                   " - throwing SEGV\n", addr);
1361      /* U R busted, sonny.  Place your hands on your head and step
1362         away from the orig_addr. */
1363      /* Code address is bad - deliver a signal instead */
1364      if (seg != NULL) {
1365         /* There's some kind of segment at the requested place, but we
1366            aren't allowed to execute code here. */
1367         VG_(synth_fault_perms)(tid, addr);
1368      } else {
1369        /* There is no segment at all; we are attempting to execute in
1370           the middle of nowhere. */
1371         VG_(synth_fault_mapping)(tid, addr);
1372      }
1373      return False;
1374   }
1375
1376   /* Do we want a self-checking translation? */
1377   do_self_check = self_check_required( seg, tid );
1378
1379   /* True if a debug trans., or if bit N set in VG_(clo_trace_codegen). */
1380   verbosity = 0;
1381   if (debugging_translation) {
1382      verbosity = debugging_verbosity;
1383   }
1384   else
1385   if ( (VG_(clo_trace_flags) > 0
1386        && VG_(get_bbs_translated)() >= VG_(clo_trace_notbelow) )) {
1387      verbosity = VG_(clo_trace_flags);
1388   }
1389
1390   /* Figure out which preamble-mangling callback to send. */
1391   preamble_fn = NULL;
1392   if (kind == T_Redir_Replace)
1393      preamble_fn = mk_preamble__set_NRADDR_to_zero;
1394   else
1395   if (kind == T_Redir_Wrap)
1396      preamble_fn = mk_preamble__set_NRADDR_to_nraddr;
1397
1398#  if defined(VG_PLAT_USES_PPCTOC)
1399   if (ULong_to_Ptr(nraddr)
1400       == (void*)&VG_(ppctoc_magic_redirect_return_stub)) {
1401      /* If entering the special return stub, this means a wrapped or
1402         redirected function is returning.  Make this translation one
1403         which restores R2 and LR from the thread's hidden redir
1404         stack, and branch to the (restored) link register, thereby
1405         really causing the function to return. */
1406      vg_assert(kind == T_Normal);
1407      vg_assert(nraddr == addr);
1408      preamble_fn = mk_preamble__ppctoc_magic_return_stub;
1409   }
1410#  endif
1411
1412   /* ------ Actually do the translation. ------ */
1413   tl_assert2(VG_(tdict).tool_instrument,
1414              "you forgot to set VgToolInterface function 'tool_instrument'");
1415
1416   /* Get the CPU info established at startup. */
1417   VG_(machine_get_VexArchInfo)( &vex_arch, &vex_archinfo );
1418
1419   /* Set up 'abiinfo' structure with stuff Vex needs to know about
1420      the guest and host ABIs. */
1421
1422   LibVEX_default_VexAbiInfo( &vex_abiinfo );
1423   vex_abiinfo.guest_stack_redzone_size = VG_STACK_REDZONE_SZB;
1424
1425#  if defined(VGP_amd64_linux)
1426   vex_abiinfo.guest_amd64_assume_fs_is_zero  = True;
1427#  endif
1428#  if defined(VGP_amd64_darwin)
1429   vex_abiinfo.guest_amd64_assume_gs_is_0x60  = True;
1430#  endif
1431#  if defined(VGP_ppc32_linux)
1432   vex_abiinfo.guest_ppc_zap_RZ_at_blr        = False;
1433   vex_abiinfo.guest_ppc_zap_RZ_at_bl         = NULL;
1434   vex_abiinfo.host_ppc32_regalign_int64_args = True;
1435#  endif
1436#  if defined(VGP_ppc64_linux)
1437   vex_abiinfo.guest_ppc_zap_RZ_at_blr        = True;
1438   vex_abiinfo.guest_ppc_zap_RZ_at_bl         = const_True;
1439   vex_abiinfo.host_ppc_calls_use_fndescrs    = True;
1440#  endif
1441#  if defined(VGP_ppc32_aix5) || defined(VGP_ppc64_aix5)
1442   vex_abiinfo.guest_ppc_zap_RZ_at_blr        = False;
1443   vex_abiinfo.guest_ppc_zap_RZ_at_bl         = bl_RZ_zap_ok_for_AIX;
1444   vex_abiinfo.guest_ppc_sc_continues_at_LR   = True;
1445   vex_abiinfo.host_ppc_calls_use_fndescrs    = True;
1446#  endif
1447
1448   /* Set up closure args. */
1449   closure.tid    = tid;
1450   closure.nraddr = nraddr;
1451   closure.readdr = addr;
1452
1453   /* Set up args for LibVEX_Translate. */
1454   vta.arch_guest       = vex_arch;
1455   vta.archinfo_guest   = vex_archinfo;
1456   vta.arch_host        = vex_arch;
1457   vta.archinfo_host    = vex_archinfo;
1458   vta.abiinfo_both     = vex_abiinfo;
1459   vta.guest_bytes      = (UChar*)ULong_to_Ptr(addr);
1460   vta.guest_bytes_addr = (Addr64)addr;
1461   vta.callback_opaque  = (void*)&closure;
1462   vta.chase_into_ok    = chase_into_ok;
1463   vta.preamble_function = preamble_fn;
1464   vta.guest_extents    = &vge;
1465   vta.host_bytes       = tmpbuf;
1466   vta.host_bytes_size  = N_TMPBUF;
1467   vta.host_bytes_used  = &tmpbuf_used;
1468   { /* At this point we have to reconcile Vex's view of the
1469        instrumentation callback - which takes a void* first argument
1470        - with Valgrind's view, in which the first arg is a
1471        VgCallbackClosure*.  Hence the following longwinded casts.
1472        They are entirely legal but longwinded so as to maximise the
1473        chance of the C typechecker picking up any type snafus. */
1474     IRSB*(*f)(VgCallbackClosure*,
1475               IRSB*,VexGuestLayout*,VexGuestExtents*,
1476               IRType,IRType)
1477       = VG_(tdict).tool_instrument;
1478     IRSB*(*g)(void*,
1479               IRSB*,VexGuestLayout*,VexGuestExtents*,
1480               IRType,IRType)
1481       = (IRSB*(*)(void*,IRSB*,VexGuestLayout*,VexGuestExtents*,IRType,IRType))f;
1482     vta.instrument1    = g;
1483   }
1484   /* No need for type kludgery here. */
1485   vta.instrument2      = need_to_handle_SP_assignment()
1486                             ? vg_SP_update_pass
1487                             : NULL;
1488   vta.finaltidy        = VG_(needs).final_IR_tidy_pass
1489                             ? VG_(tdict).tool_final_IR_tidy_pass
1490                             : NULL;
1491   vta.do_self_check    = do_self_check;
1492   vta.traceflags       = verbosity;
1493
1494   /* Set up the dispatch-return info.  For archs without a link
1495      register, vex generates a jump back to the specified dispatch
1496      address.  Else, it just generates a branch-to-LR. */
1497#  if defined(VGA_x86) || defined(VGA_amd64)
1498   vta.dispatch
1499      = (!allow_redirection)
1500        ? /* It's a no-redir translation.  Will be run with the nonstandard
1501           dispatcher VG_(run_a_noredir_translation)
1502           and so needs a nonstandard return point. */
1503          (void*) &VG_(run_a_noredir_translation__return_point)
1504
1505        : /* normal translation.  Uses VG_(run_innerloop).  Return
1506             point depends on whether we're profiling bbs or not. */
1507          VG_(clo_profile_flags) > 0
1508          ? (void*) &VG_(run_innerloop__dispatch_profiled)
1509          : (void*) &VG_(run_innerloop__dispatch_unprofiled);
1510#  elif defined(VGA_ppc32) || defined(VGA_ppc64) \
1511        || defined(VGA_arm)
1512   vta.dispatch = NULL;
1513#  else
1514#    error "Unknown arch"
1515#  endif
1516
1517   /* Sheesh.  Finally, actually _do_ the translation! */
1518   tres = LibVEX_Translate ( &vta );
1519
1520   vg_assert(tres == VexTransOK);
1521   vg_assert(tmpbuf_used <= N_TMPBUF);
1522   vg_assert(tmpbuf_used > 0);
1523
1524   /* Tell aspacem of all segments that have had translations taken
1525      from them.  Optimisation: don't re-look up vge.base[0] since seg
1526      should already point to it. */
1527
1528   vg_assert( vge.base[0] == (Addr64)addr );
1529   /* set 'translations taken from this segment' flag */
1530   VG_(am_set_segment_hasT_if_SkFileC_or_SkAnonC)( (NSegment*)seg );
1531   } /* END new scope specially for 'seg' */
1532
1533   for (i = 1; i < vge.n_used; i++) {
1534      NSegment const* seg
1535         = VG_(am_find_nsegment)( vge.base[i] );
1536      /* set 'translations taken from this segment' flag */
1537      VG_(am_set_segment_hasT_if_SkFileC_or_SkAnonC)( (NSegment*)seg );
1538   }
1539
1540   /* Copy data at trans_addr into the translation cache. */
1541   vg_assert(tmpbuf_used > 0 && tmpbuf_used < 65536);
1542
1543   // If debugging, don't do anything with the translated block;  we
1544   // only did this for the debugging output produced along the way.
1545   if (!debugging_translation) {
1546
1547      if (kind != T_NoRedir) {
1548          // Put it into the normal TT/TC structures.  This is the
1549          // normal case.
1550
1551          // Note that we use nraddr (the non-redirected address), not
1552          // addr, which might have been changed by the redirection
1553          VG_(add_to_transtab)( &vge,
1554                                nraddr,
1555                                (Addr)(&tmpbuf[0]),
1556                                tmpbuf_used,
1557                                do_self_check );
1558      } else {
1559          VG_(add_to_unredir_transtab)( &vge,
1560                                        nraddr,
1561                                        (Addr)(&tmpbuf[0]),
1562                                        tmpbuf_used );
1563      }
1564   }
1565
1566   return True;
1567}
1568
1569/*--------------------------------------------------------------------*/
1570/*--- end                                                          ---*/
1571/*--------------------------------------------------------------------*/
1572