1
2/*---------------------------------------------------------------*/
3/*--- begin                                       test_main.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2013 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <string.h>
40
41#include "libvex_basictypes.h"
42#include "libvex.h"
43
44#include "test_main.h"
45
46
47/*---------------------------------------------------------------*/
48/*--- Test                                                    ---*/
49/*---------------------------------------------------------------*/
50
51
52__attribute__ ((noreturn))
53static
54void failure_exit ( void )
55{
56   fprintf(stdout, "VEX did failure_exit.  Bye.\n");
57   exit(1);
58}
59
60static
61void log_bytes ( HChar* bytes, Int nbytes )
62{
63   fwrite ( bytes, 1, nbytes, stdout );
64}
65
66#define N_LINEBUF 10000
67static HChar linebuf[N_LINEBUF];
68
69#define N_ORIGBUF 10000
70#define N_TRANSBUF 5000
71
72static UChar origbuf[N_ORIGBUF];
73static UChar transbuf[N_TRANSBUF];
74
75static Bool verbose = True;
76
77/* Forwards */
78#if 1 /* UNUSED */
79//static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80static
81IRSB* mc_instrument ( void* closureV,
82                      IRSB* bb_in, VexGuestLayout* layout,
83                      VexGuestExtents* vge,
84                      IRType gWordTy, IRType hWordTy );
85#endif
86
87static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
88   return False;
89}
90static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
91   return 0;
92}
93
94int main ( int argc, char** argv )
95{
96   FILE* f;
97   Int i;
98   UInt u, sum;
99   Addr32 orig_addr;
100   Int bb_number, n_bbs_done = 0;
101   Int orig_nbytes, trans_used;
102   VexTranslateResult tres;
103   VexControl vcon;
104   VexGuestExtents vge;
105   VexArchInfo vai_x86, vai_amd64, vai_ppc32, vai_arm;
106   VexAbiInfo vbi;
107   VexTranslateArgs vta;
108
109   if (argc != 2) {
110      fprintf(stderr, "usage: vex file.orig\n");
111      exit(1);
112   }
113   f = fopen(argv[1], "r");
114   if (!f) {
115      fprintf(stderr, "can't open `%s'\n", argv[1]);
116      exit(1);
117   }
118
119   /* Run with default params.  However, we can't allow bb chasing
120      since that causes the front end to get segfaults when it tries
121      to read code outside the initial BB we hand it.  So when calling
122      LibVEX_Translate, send in a chase-into predicate that always
123      returns False. */
124   LibVEX_default_VexControl ( &vcon );
125   vcon.iropt_level = 2;
126   vcon.guest_max_insns = 60;
127
128   LibVEX_Init ( &failure_exit, &log_bytes,
129                 1,  /* debug_paranoia */
130                 TEST_VSUPPORT, /* valgrind support */
131                 &vcon );
132
133
134   while (!feof(f)) {
135
136      __attribute__((unused))
137      char* unused1 = fgets(linebuf, N_LINEBUF,f);
138      if (linebuf[0] == 0) continue;
139      if (linebuf[0] != '.') continue;
140
141      if (n_bbs_done == TEST_N_BBS) break;
142      n_bbs_done++;
143
144      /* first line is:   . bb-number bb-addr n-bytes */
145      assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146                                 & bb_number,
147                                 & orig_addr, & orig_nbytes ));
148      assert(orig_nbytes >= 1);
149      assert(!feof(f));
150      __attribute__((unused))
151      char* unused2 = fgets(linebuf, N_LINEBUF,f);
152      assert(linebuf[0] == '.');
153
154      /* second line is:   . byte byte byte etc */
155      if (verbose)
156         printf("============ Basic Block %d, Done %d, "
157                "Start %x, nbytes %2d ============",
158                bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159
160      /* thumb ITstate analysis needs to examine the 18 bytes
161         preceding the first instruction.  So let's leave the first 18
162         zeroed out. */
163      memset(origbuf, 0, sizeof(origbuf));
164
165      assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
166      for (i = 0; i < orig_nbytes; i++) {
167         assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
168         origbuf[18+ i] = (UChar)u;
169      }
170
171      /* FIXME: put sensible values into the .hwcaps fields */
172      LibVEX_default_VexArchInfo(&vai_x86);
173      vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
174                       | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
175
176      LibVEX_default_VexArchInfo(&vai_amd64);
177      vai_amd64.hwcaps = 0;
178
179      LibVEX_default_VexArchInfo(&vai_ppc32);
180      vai_ppc32.hwcaps = 0;
181      vai_ppc32.ppc_icache_line_szB = 128;
182
183      LibVEX_default_VexArchInfo(&vai_arm);
184      vai_arm.hwcaps = VEX_HWCAPS_ARM_VFP3 | VEX_HWCAPS_ARM_NEON | 7;
185
186      LibVEX_default_VexAbiInfo(&vbi);
187      vbi.guest_stack_redzone_size = 128;
188
189      /* ----- Set up args for LibVEX_Translate ----- */
190
191      vta.abiinfo_both    = vbi;
192      vta.guest_bytes     = &origbuf[18];
193      vta.guest_bytes_addr = (Addr64)orig_addr;
194      vta.callback_opaque = NULL;
195      vta.chase_into_ok   = chase_into_not_ok;
196      vta.guest_extents   = &vge;
197      vta.host_bytes      = transbuf;
198      vta.host_bytes_size = N_TRANSBUF;
199      vta.host_bytes_used = &trans_used;
200
201#if 0 /* ppc32 -> ppc32 */
202      vta.arch_guest     = VexArchPPC32;
203      vta.archinfo_guest = vai_ppc32;
204      vta.arch_host      = VexArchPPC32;
205      vta.archinfo_host  = vai_ppc32;
206#endif
207#if 0 /* amd64 -> amd64 */
208      vta.arch_guest     = VexArchAMD64;
209      vta.archinfo_guest = vai_amd64;
210      vta.arch_host      = VexArchAMD64;
211      vta.archinfo_host  = vai_amd64;
212#endif
213#if 0 /* x86 -> x86 */
214      vta.arch_guest     = VexArchX86;
215      vta.archinfo_guest = vai_x86;
216      vta.arch_host      = VexArchX86;
217      vta.archinfo_host  = vai_x86;
218#endif
219#if 1 /* arm -> arm */
220      vta.arch_guest     = VexArchARM;
221      vta.archinfo_guest = vai_arm;
222      vta.arch_host      = VexArchARM;
223      vta.archinfo_host  = vai_arm;
224      /* ARM/Thumb only hacks, that are needed to keep the ITstate
225         analyser in the front end happy.  */
226      vta.guest_bytes     = &origbuf[18 +1];
227      vta.guest_bytes_addr = (Addr64)(&origbuf[18 +1]);
228#endif
229
230#if 1 /* no instrumentation */
231      vta.instrument1     = NULL;
232      vta.instrument2     = NULL;
233#endif
234#if 0 /* addrcheck */
235      vta.instrument1     = ac_instrument;
236      vta.instrument2     = NULL;
237#endif
238#if 0 /* memcheck */
239      vta.instrument1     = mc_instrument;
240      vta.instrument2     = NULL;
241#endif
242      vta.needs_self_check  = needs_self_check;
243      vta.preamble_function = NULL;
244      vta.traceflags      = TEST_FLAGS;
245      vta.addProfInc      = False;
246      vta.sigill_diag     = True;
247
248      vta.disp_cp_chain_me_to_slowEP = (void*)0x12345678;
249      vta.disp_cp_chain_me_to_fastEP = (void*)0x12345679;
250      vta.disp_cp_xindir             = (void*)0x1234567A;
251      vta.disp_cp_xassisted          = (void*)0x1234567B;
252
253      vta.finaltidy = NULL;
254
255      for (i = 0; i < TEST_N_ITERS; i++)
256         tres = LibVEX_Translate ( &vta );
257
258      if (tres.status != VexTransOK)
259         printf("\ntres = %d\n", (Int)tres.status);
260      assert(tres.status == VexTransOK);
261      assert(tres.n_sc_extents == 0);
262      assert(vge.n_used == 1);
263      assert((UInt)(vge.len[0]) == orig_nbytes);
264
265      sum = 0;
266      for (i = 0; i < trans_used; i++)
267         sum += (UInt)transbuf[i];
268      printf ( " %6.2f ... %u\n",
269               (double)trans_used / (double)vge.len[0], sum );
270   }
271
272   fclose(f);
273   printf("\n");
274   LibVEX_ShowAllocStats();
275
276   return 0;
277}
278
279//////////////////////////////////////////////////////////////////////
280//////////////////////////////////////////////////////////////////////
281//////////////////////////////////////////////////////////////////////
282//////////////////////////////////////////////////////////////////////
283//////////////////////////////////////////////////////////////////////
284//////////////////////////////////////////////////////////////////////
285//////////////////////////////////////////////////////////////////////
286//////////////////////////////////////////////////////////////////////
287
288#if 0 /* UNUSED */
289
290static
291__attribute((noreturn))
292void panic ( HChar* s )
293{
294  printf("\npanic: %s\n", s);
295  failure_exit();
296}
297
298static
299IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
300{
301/* Use this rather than eg. -1 because it's a UInt. */
302#define INVALID_DATA_SIZE   999999
303
304   Int         i;
305   Int         sz;
306   IRCallee*   helper;
307   IRStmt*    st;
308   IRExpr* data;
309   IRExpr* addr;
310   Bool needSz;
311
312   /* Set up BB */
313   IRSB* bb     = emptyIRSB();
314   bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
315   bb->next     = dopyIRExpr(bb_in->next);
316   bb->jumpkind = bb_in->jumpkind;
317
318   /* No loads to consider in ->next. */
319   assert(isIRAtom(bb_in->next));
320
321   for (i = 0; i <  bb_in->stmts_used; i++) {
322      st = bb_in->stmts[i];
323      if (!st) continue;
324
325      switch (st->tag) {
326
327         case Ist_Tmp:
328            data = st->Ist.Tmp.data;
329            if (data->tag == Iex_LDle) {
330               addr = data->Iex.LDle.addr;
331               sz = sizeofIRType(data->Iex.LDle.ty);
332               needSz = False;
333               switch (sz) {
334                  case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
335                                                 (void*)0x12345601); break;
336                  case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
337                                                 (void*)0x12345602); break;
338                  case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
339                                                 (void*)0x12345603); break;
340                  default: helper = mkIRCallee(0, "ac_helperc_LOADN",
341                                                  (void*)0x12345604);
342                                                  needSz = True; break;
343               }
344               if (needSz) {
345                  addStmtToIRSB(
346                     bb,
347                     IRStmt_Dirty(
348                        unsafeIRDirty_0_N( helper->regparms,
349					   helper->name, helper->addr,
350                                           mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
351                  ));
352               } else {
353                  addStmtToIRSB(
354                     bb,
355                     IRStmt_Dirty(
356                        unsafeIRDirty_0_N( helper->regparms,
357					   helper->name, helper->addr,
358                                           mkIRExprVec_1(addr) )
359                  ));
360               }
361            }
362            break;
363
364         case Ist_STle:
365            data = st->Ist.STle.data;
366            addr = st->Ist.STle.addr;
367            assert(isIRAtom(data));
368            assert(isIRAtom(addr));
369            sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
370            needSz = False;
371            switch (sz) {
372               case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
373                                              (void*)0x12345605); break;
374               case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
375                                              (void*)0x12345606); break;
376               case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
377                                              (void*)0x12345607); break;
378               default: helper = mkIRCallee(0, "ac_helperc_STOREN",
379                                               (void*)0x12345608);
380                                               needSz = True; break;
381            }
382            if (needSz) {
383               addStmtToIRSB(
384                  bb,
385                  IRStmt_Dirty(
386                     unsafeIRDirty_0_N( helper->regparms,
387    				        helper->name, helper->addr,
388                                        mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
389               ));
390            } else {
391               addStmtToIRSB(
392                  bb,
393                  IRStmt_Dirty(
394                     unsafeIRDirty_0_N( helper->regparms,
395                                        helper->name, helper->addr,
396                                        mkIRExprVec_1(addr) )
397               ));
398            }
399            break;
400
401         case Ist_Put:
402            assert(isIRAtom(st->Ist.Put.data));
403            break;
404
405         case Ist_PutI:
406            assert(isIRAtom(st->Ist.PutI.ix));
407            assert(isIRAtom(st->Ist.PutI.data));
408            break;
409
410         case Ist_Exit:
411            assert(isIRAtom(st->Ist.Exit.guard));
412            break;
413
414         case Ist_Dirty:
415            /* If the call doesn't interact with memory, we ain't
416               interested. */
417            if (st->Ist.Dirty.details->mFx == Ifx_None)
418               break;
419            goto unhandled;
420
421         default:
422         unhandled:
423            printf("\n");
424            ppIRStmt(st);
425            printf("\n");
426            panic("addrcheck: unhandled IRStmt");
427      }
428
429      addStmtToIRSB( bb, dopyIRStmt(st));
430   }
431
432   return bb;
433}
434#endif /* UNUSED */
435
436//////////////////////////////////////////////////////////////////////
437//////////////////////////////////////////////////////////////////////
438//////////////////////////////////////////////////////////////////////
439//////////////////////////////////////////////////////////////////////
440//////////////////////////////////////////////////////////////////////
441//////////////////////////////////////////////////////////////////////
442//////////////////////////////////////////////////////////////////////
443//////////////////////////////////////////////////////////////////////
444
445#if 1 /* UNUSED */
446
447static
448__attribute((noreturn))
449void panic ( HChar* s )
450{
451  printf("\npanic: %s\n", s);
452  failure_exit();
453}
454
455#define tl_assert(xxx) assert(xxx)
456#define VG_(xxxx) xxxx
457#define tool_panic(zzz) panic(zzz)
458#define MC_(zzzz) MC_##zzzz
459#define TL_(zzzz) SK_##zzzz
460
461
462static void MC_helperc_complain_undef ( void );
463static void MC_helperc_LOADV8 ( void );
464static void MC_helperc_LOADV4 ( void );
465static void MC_helperc_LOADV2 ( void );
466static void MC_helperc_LOADV1 ( void );
467static void MC_helperc_STOREV8( void );
468static void MC_helperc_STOREV4( void );
469static void MC_helperc_STOREV2( void );
470static void MC_helperc_STOREV1( void );
471static void MC_helperc_value_check0_fail( void );
472static void MC_helperc_value_check1_fail( void );
473static void MC_helperc_value_check4_fail( void );
474
475static void MC_helperc_complain_undef ( void ) { }
476static void MC_helperc_LOADV8 ( void ) { }
477static void MC_helperc_LOADV4 ( void ) { }
478static void MC_helperc_LOADV2 ( void ) { }
479static void MC_helperc_LOADV1 ( void ) { }
480static void MC_helperc_STOREV8( void ) { }
481static void MC_helperc_STOREV4( void ) { }
482static void MC_helperc_STOREV2( void ) { }
483static void MC_helperc_STOREV1( void ) { }
484static void MC_helperc_value_check0_fail( void ) { }
485static void MC_helperc_value_check1_fail( void ) { }
486static void MC_helperc_value_check4_fail( void ) { }
487
488
489/*--------------------------------------------------------------------*/
490/*--- Instrument IR to perform memory checking operations.         ---*/
491/*---                                               mc_translate.c ---*/
492/*--------------------------------------------------------------------*/
493
494/*
495   This file is part of MemCheck, a heavyweight Valgrind tool for
496   detecting memory errors.
497
498   Copyright (C) 2000-2013 Julian Seward
499      jseward@acm.org
500
501   This program is free software; you can redistribute it and/or
502   modify it under the terms of the GNU General Public License as
503   published by the Free Software Foundation; either version 2 of the
504   License, or (at your option) any later version.
505
506   This program is distributed in the hope that it will be useful, but
507   WITHOUT ANY WARRANTY; without even the implied warranty of
508   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
509   General Public License for more details.
510
511   You should have received a copy of the GNU General Public License
512   along with this program; if not, write to the Free Software
513   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
514   02111-1307, USA.
515
516   The GNU General Public License is contained in the file COPYING.
517*/
518
519//#include "mc_include.h"
520
521
522/*------------------------------------------------------------*/
523/*--- Forward decls                                        ---*/
524/*------------------------------------------------------------*/
525
526struct _MCEnv;
527
528static IRType  shadowType ( IRType ty );
529static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
530
531
532/*------------------------------------------------------------*/
533/*--- Memcheck running state, and tmp management.          ---*/
534/*------------------------------------------------------------*/
535
536/* Carries around state during memcheck instrumentation. */
537typedef
538   struct _MCEnv {
539      /* MODIFIED: the bb being constructed.  IRStmts are added. */
540      IRSB* bb;
541
542      /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
543         original temps to their current their current shadow temp.
544         Initially all entries are IRTemp_INVALID.  Entries are added
545         lazily since many original temps are not used due to
546         optimisation prior to instrumentation.  Note that floating
547         point original tmps are shadowed by integer tmps of the same
548         size, and Bit-typed original tmps are shadowed by the type
549         Ity_I8.  See comment below. */
550      IRTemp* tmpMap;
551      Int     n_originalTmps; /* for range checking */
552
553      /* READONLY: the guest layout.  This indicates which parts of
554         the guest state should be regarded as 'always defined'. */
555      VexGuestLayout* layout;
556      /* READONLY: the host word type.  Needed for constructing
557         arguments of type 'HWord' to be passed to helper functions.
558         Ity_I32 or Ity_I64 only. */
559      IRType hWordTy;
560   }
561   MCEnv;
562
563/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
564   demand), as they are encountered.  This is for two reasons.
565
566   (1) (less important reason): Many original tmps are unused due to
567   initial IR optimisation, and we do not want to spaces in tables
568   tracking them.
569
570   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
571   table indexed [0 .. n_types-1], which gives the current shadow for
572   each original tmp, or INVALID_IRTEMP if none is so far assigned.
573   It is necessary to support making multiple assignments to a shadow
574   -- specifically, after testing a shadow for definedness, it needs
575   to be made defined.  But IR's SSA property disallows this.
576
577   (2) (more important reason): Therefore, when a shadow needs to get
578   a new value, a new temporary is created, the value is assigned to
579   that, and the tmpMap is updated to reflect the new binding.
580
581   A corollary is that if the tmpMap maps a given tmp to
582   INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
583   there's a read-before-write error in the original tmps.  The IR
584   sanity checker should catch all such anomalies, however.
585*/
586
587/* Find the tmp currently shadowing the given original tmp.  If none
588   so far exists, allocate one.  */
589static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
590{
591   tl_assert(orig < mce->n_originalTmps);
592   if (mce->tmpMap[orig] == IRTemp_INVALID) {
593      mce->tmpMap[orig]
594         = newIRTemp(mce->bb->tyenv,
595                     shadowType(mce->bb->tyenv->types[orig]));
596   }
597   return mce->tmpMap[orig];
598}
599
600/* Allocate a new shadow for the given original tmp.  This means any
601   previous shadow is abandoned.  This is needed because it is
602   necessary to give a new value to a shadow once it has been tested
603   for undefinedness, but unfortunately IR's SSA property disallows
604   this.  Instead we must abandon the old shadow, allocate a new one
605   and use that instead. */
606static void newShadowTmp ( MCEnv* mce, IRTemp orig )
607{
608   tl_assert(orig < mce->n_originalTmps);
609   mce->tmpMap[orig]
610      = newIRTemp(mce->bb->tyenv,
611                  shadowType(mce->bb->tyenv->types[orig]));
612}
613
614
615/*------------------------------------------------------------*/
616/*--- IRAtoms -- a subset of IRExprs                       ---*/
617/*------------------------------------------------------------*/
618
619/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
620   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
621   input, most of this code deals in atoms.  Usefully, a value atom
622   always has a V-value which is also an atom: constants are shadowed
623   by constants, and temps are shadowed by the corresponding shadow
624   temporary. */
625
626typedef  IRExpr  IRAtom;
627
628/* (used for sanity checks only): is this an atom which looks
629   like it's from original code? */
630static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
631{
632   if (a1->tag == Iex_Const)
633      return True;
634   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
635      return True;
636   return False;
637}
638
639/* (used for sanity checks only): is this an atom which looks
640   like it's from shadow code? */
641static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
642{
643   if (a1->tag == Iex_Const)
644      return True;
645   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
646      return True;
647   return False;
648}
649
650/* (used for sanity checks only): check that both args are atoms and
651   are identically-kinded. */
652static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
653{
654   if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
655      return True;
656   if (a1->tag == Iex_Const && a1->tag == Iex_Const)
657      return True;
658   return False;
659}
660
661
662/*------------------------------------------------------------*/
663/*--- Type management                                      ---*/
664/*------------------------------------------------------------*/
665
666/* Shadow state is always accessed using integer types.  This returns
667   an integer type with the same size (as per sizeofIRType) as the
668   given type.  The only valid shadow types are Bit, I8, I16, I32,
669   I64, V128. */
670
671static IRType shadowType ( IRType ty )
672{
673   switch (ty) {
674      case Ity_I1:
675      case Ity_I8:
676      case Ity_I16:
677      case Ity_I32:
678      case Ity_I64:  return ty;
679      case Ity_F32:  return Ity_I32;
680      case Ity_F64:  return Ity_I64;
681      case Ity_V128: return Ity_V128;
682      default: ppIRType(ty);
683               VG_(tool_panic)("memcheck:shadowType");
684   }
685}
686
687/* Produce a 'defined' value of the given shadow type.  Should only be
688   supplied shadow types (Bit/I8/I16/I32/UI64). */
689static IRExpr* definedOfType ( IRType ty ) {
690   switch (ty) {
691      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
692      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
693      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
694      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
695      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
696      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
697      default:      VG_(tool_panic)("memcheck:definedOfType");
698   }
699}
700
701
702/*------------------------------------------------------------*/
703/*--- Constructing IR fragments                            ---*/
704/*------------------------------------------------------------*/
705
706/* assign value to tmp */
707#define assign(_bb,_tmp,_expr)   \
708   addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
709
710/* add stmt to a bb */
711#define stmt(_bb,_stmt)    \
712   addStmtToIRSB((_bb), (_stmt))
713
714/* build various kinds of expressions */
715#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
716#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
717#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
718#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
719#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
720#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
721#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
722#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
723
724/* bind the given expression to a new temporary, and return the
725   temporary.  This effectively converts an arbitrary expression into
726   an atom. */
727static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
728   IRTemp t = newIRTemp(mce->bb->tyenv, ty);
729   assign(mce->bb, t, e);
730   return mkexpr(t);
731}
732
733
734/*------------------------------------------------------------*/
735/*--- Constructing definedness primitive ops               ---*/
736/*------------------------------------------------------------*/
737
738/* --------- Defined-if-either-defined --------- */
739
740static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
741   tl_assert(isShadowAtom(mce,a1));
742   tl_assert(isShadowAtom(mce,a2));
743   return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
744}
745
746static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
747   tl_assert(isShadowAtom(mce,a1));
748   tl_assert(isShadowAtom(mce,a2));
749   return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
750}
751
752static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
753   tl_assert(isShadowAtom(mce,a1));
754   tl_assert(isShadowAtom(mce,a2));
755   return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
756}
757
758static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
759   tl_assert(isShadowAtom(mce,a1));
760   tl_assert(isShadowAtom(mce,a2));
761   return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
762}
763
764static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
765   tl_assert(isShadowAtom(mce,a1));
766   tl_assert(isShadowAtom(mce,a2));
767   return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
768}
769
770/* --------- Undefined-if-either-undefined --------- */
771
772static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
773   tl_assert(isShadowAtom(mce,a1));
774   tl_assert(isShadowAtom(mce,a2));
775   return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
776}
777
778static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
779   tl_assert(isShadowAtom(mce,a1));
780   tl_assert(isShadowAtom(mce,a2));
781   return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
782}
783
784static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
785   tl_assert(isShadowAtom(mce,a1));
786   tl_assert(isShadowAtom(mce,a2));
787   return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
788}
789
790static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
791   tl_assert(isShadowAtom(mce,a1));
792   tl_assert(isShadowAtom(mce,a2));
793   return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
794}
795
796static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
797   tl_assert(isShadowAtom(mce,a1));
798   tl_assert(isShadowAtom(mce,a2));
799   return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
800}
801
802static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
803   switch (vty) {
804      case Ity_I8:   return mkUifU8(mce, a1, a2);
805      case Ity_I16:  return mkUifU16(mce, a1, a2);
806      case Ity_I32:  return mkUifU32(mce, a1, a2);
807      case Ity_I64:  return mkUifU64(mce, a1, a2);
808      case Ity_V128: return mkUifUV128(mce, a1, a2);
809      default:
810         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
811         VG_(tool_panic)("memcheck:mkUifU");
812   }
813}
814
815/* --------- The Left-family of operations. --------- */
816
817static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
818   tl_assert(isShadowAtom(mce,a1));
819   /* It's safe to duplicate a1 since it's only an atom */
820   return assignNew(mce, Ity_I8,
821                    binop(Iop_Or8, a1,
822                          assignNew(mce, Ity_I8,
823                                    /* unop(Iop_Neg8, a1)))); */
824                                    binop(Iop_Sub8, mkU8(0), a1) )));
825}
826
827static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
828   tl_assert(isShadowAtom(mce,a1));
829   /* It's safe to duplicate a1 since it's only an atom */
830   return assignNew(mce, Ity_I16,
831                    binop(Iop_Or16, a1,
832                          assignNew(mce, Ity_I16,
833                                    /* unop(Iop_Neg16, a1)))); */
834                                    binop(Iop_Sub16, mkU16(0), a1) )));
835}
836
837static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
838   tl_assert(isShadowAtom(mce,a1));
839   /* It's safe to duplicate a1 since it's only an atom */
840   return assignNew(mce, Ity_I32,
841                    binop(Iop_Or32, a1,
842                          assignNew(mce, Ity_I32,
843                                    /* unop(Iop_Neg32, a1)))); */
844                                    binop(Iop_Sub32, mkU32(0), a1) )));
845}
846
847/* --------- 'Improvement' functions for AND/OR. --------- */
848
849/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
850   defined (0); all other -> undefined (1).
851*/
852static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
853{
854   tl_assert(isOriginalAtom(mce, data));
855   tl_assert(isShadowAtom(mce, vbits));
856   tl_assert(sameKindedAtoms(data, vbits));
857   return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
858}
859
860static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
861{
862   tl_assert(isOriginalAtom(mce, data));
863   tl_assert(isShadowAtom(mce, vbits));
864   tl_assert(sameKindedAtoms(data, vbits));
865   return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
866}
867
868static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
869{
870   tl_assert(isOriginalAtom(mce, data));
871   tl_assert(isShadowAtom(mce, vbits));
872   tl_assert(sameKindedAtoms(data, vbits));
873   return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
874}
875
876static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
877{
878   tl_assert(isOriginalAtom(mce, data));
879   tl_assert(isShadowAtom(mce, vbits));
880   tl_assert(sameKindedAtoms(data, vbits));
881   return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
882}
883
884static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
885{
886   tl_assert(isOriginalAtom(mce, data));
887   tl_assert(isShadowAtom(mce, vbits));
888   tl_assert(sameKindedAtoms(data, vbits));
889   return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
890}
891
892/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
893   defined (0); all other -> undefined (1).
894*/
895static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
896{
897   tl_assert(isOriginalAtom(mce, data));
898   tl_assert(isShadowAtom(mce, vbits));
899   tl_assert(sameKindedAtoms(data, vbits));
900   return assignNew(
901             mce, Ity_I8,
902             binop(Iop_Or8,
903                   assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
904                   vbits) );
905}
906
907static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
908{
909   tl_assert(isOriginalAtom(mce, data));
910   tl_assert(isShadowAtom(mce, vbits));
911   tl_assert(sameKindedAtoms(data, vbits));
912   return assignNew(
913             mce, Ity_I16,
914             binop(Iop_Or16,
915                   assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
916                   vbits) );
917}
918
919static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
920{
921   tl_assert(isOriginalAtom(mce, data));
922   tl_assert(isShadowAtom(mce, vbits));
923   tl_assert(sameKindedAtoms(data, vbits));
924   return assignNew(
925             mce, Ity_I32,
926             binop(Iop_Or32,
927                   assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
928                   vbits) );
929}
930
931static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
932{
933   tl_assert(isOriginalAtom(mce, data));
934   tl_assert(isShadowAtom(mce, vbits));
935   tl_assert(sameKindedAtoms(data, vbits));
936   return assignNew(
937             mce, Ity_I64,
938             binop(Iop_Or64,
939                   assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
940                   vbits) );
941}
942
943static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
944{
945   tl_assert(isOriginalAtom(mce, data));
946   tl_assert(isShadowAtom(mce, vbits));
947   tl_assert(sameKindedAtoms(data, vbits));
948   return assignNew(
949             mce, Ity_V128,
950             binop(Iop_OrV128,
951                   assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
952                   vbits) );
953}
954
955/* --------- Pessimising casts. --------- */
956
957static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
958{
959   IRType  ty;
960   IRAtom* tmp1;
961   /* Note, dst_ty is a shadow type, not an original type. */
962   /* First of all, collapse vbits down to a single bit. */
963   tl_assert(isShadowAtom(mce,vbits));
964   ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
965   tmp1 = NULL;
966   switch (ty) {
967      case Ity_I1:
968         tmp1 = vbits;
969         break;
970      case Ity_I8:
971         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
972         break;
973      case Ity_I16:
974         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
975         break;
976      case Ity_I32:
977         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
978         break;
979      case Ity_I64:
980         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
981         break;
982      default:
983         VG_(tool_panic)("mkPCastTo(1)");
984   }
985   tl_assert(tmp1);
986   /* Now widen up to the dst type. */
987   switch (dst_ty) {
988      case Ity_I1:
989         return tmp1;
990      case Ity_I8:
991         return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
992      case Ity_I16:
993         return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
994      case Ity_I32:
995         return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
996      case Ity_I64:
997         return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
998      case Ity_V128:
999         tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
1000         tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
1001         return tmp1;
1002      default:
1003         ppIRType(dst_ty);
1004         VG_(tool_panic)("mkPCastTo(2)");
1005   }
1006}
1007
1008
1009/*------------------------------------------------------------*/
1010/*--- Emit a test and complaint if something is undefined. ---*/
1011/*------------------------------------------------------------*/
1012
1013/* Set the annotations on a dirty helper to indicate that the stack
1014   pointer and instruction pointers might be read.  This is the
1015   behaviour of all 'emit-a-complaint' style functions we might
1016   call. */
1017
1018static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1019   di->nFxState = 2;
1020   di->fxState[0].fx     = Ifx_Read;
1021   di->fxState[0].offset = mce->layout->offset_SP;
1022   di->fxState[0].size   = mce->layout->sizeof_SP;
1023   di->fxState[1].fx     = Ifx_Read;
1024   di->fxState[1].offset = mce->layout->offset_IP;
1025   di->fxState[1].size   = mce->layout->sizeof_IP;
1026}
1027
1028
1029/* Check the supplied **original** atom for undefinedness, and emit a
1030   complaint if so.  Once that happens, mark it as defined.  This is
1031   possible because the atom is either a tmp or literal.  If it's a
1032   tmp, it will be shadowed by a tmp, and so we can set the shadow to
1033   be defined.  In fact as mentioned above, we will have to allocate a
1034   new tmp to carry the new 'defined' shadow value, and update the
1035   original->tmp mapping accordingly; we cannot simply assign a new
1036   value to an existing shadow tmp as this breaks SSAness -- resulting
1037   in the post-instrumentation sanity checker spluttering in disapproval.
1038*/
1039static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1040{
1041   IRAtom*  vatom;
1042   IRType   ty;
1043   Int      sz;
1044   IRDirty* di;
1045   IRAtom*  cond;
1046
1047   /* Since the original expression is atomic, there's no duplicated
1048      work generated by making multiple V-expressions for it.  So we
1049      don't really care about the possibility that someone else may
1050      also create a V-interpretion for it. */
1051   tl_assert(isOriginalAtom(mce, atom));
1052   vatom = expr2vbits( mce, atom );
1053   tl_assert(isShadowAtom(mce, vatom));
1054   tl_assert(sameKindedAtoms(atom, vatom));
1055
1056   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1057
1058   /* sz is only used for constructing the error message */
1059   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1060
1061   cond = mkPCastTo( mce, Ity_I1, vatom );
1062   /* cond will be 0 if all defined, and 1 if any not defined. */
1063
1064   switch (sz) {
1065      case 0:
1066         di = unsafeIRDirty_0_N( 0/*regparms*/,
1067                                 "MC_(helperc_value_check0_fail)",
1068                                 &MC_(helperc_value_check0_fail),
1069                                 mkIRExprVec_0()
1070                               );
1071         break;
1072      case 1:
1073         di = unsafeIRDirty_0_N( 0/*regparms*/,
1074                                 "MC_(helperc_value_check1_fail)",
1075                                 &MC_(helperc_value_check1_fail),
1076                                 mkIRExprVec_0()
1077                               );
1078         break;
1079      case 4:
1080         di = unsafeIRDirty_0_N( 0/*regparms*/,
1081                                 "MC_(helperc_value_check4_fail)",
1082                                 &MC_(helperc_value_check4_fail),
1083                                 mkIRExprVec_0()
1084                               );
1085         break;
1086      default:
1087         di = unsafeIRDirty_0_N( 1/*regparms*/,
1088                                 "MC_(helperc_complain_undef)",
1089                                 &MC_(helperc_complain_undef),
1090                                 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1091                               );
1092         break;
1093   }
1094   di->guard = cond;
1095   setHelperAnns( mce, di );
1096   stmt( mce->bb, IRStmt_Dirty(di));
1097
1098   /* Set the shadow tmp to be defined.  First, update the
1099      orig->shadow tmp mapping to reflect the fact that this shadow is
1100      getting a new value. */
1101   tl_assert(isIRAtom(vatom));
1102   /* sameKindedAtoms ... */
1103   if (vatom->tag == Iex_RdTmp) {
1104      tl_assert(atom->tag == Iex_RdTmp);
1105      newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1106      assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1107                      definedOfType(ty));
1108   }
1109}
1110
1111
1112/*------------------------------------------------------------*/
1113/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1114/*------------------------------------------------------------*/
1115
1116/* Examine the always-defined sections declared in layout to see if
1117   the (offset,size) section is within one.  Note, is is an error to
1118   partially fall into such a region: (offset,size) should either be
1119   completely in such a region or completely not-in such a region.
1120*/
1121static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1122{
1123   Int minoffD, maxoffD, i;
1124   Int minoff = offset;
1125   Int maxoff = minoff + size - 1;
1126   tl_assert((minoff & ~0xFFFF) == 0);
1127   tl_assert((maxoff & ~0xFFFF) == 0);
1128
1129   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1130      minoffD = mce->layout->alwaysDefd[i].offset;
1131      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1132      tl_assert((minoffD & ~0xFFFF) == 0);
1133      tl_assert((maxoffD & ~0xFFFF) == 0);
1134
1135      if (maxoff < minoffD || maxoffD < minoff)
1136         continue; /* no overlap */
1137      if (minoff >= minoffD && maxoff <= maxoffD)
1138         return True; /* completely contained in an always-defd section */
1139
1140      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1141   }
1142   return False; /* could not find any containing section */
1143}
1144
1145
1146/* Generate into bb suitable actions to shadow this Put.  If the state
1147   slice is marked 'always defined', do nothing.  Otherwise, write the
1148   supplied V bits to the shadow state.  We can pass in either an
1149   original atom or a V-atom, but not both.  In the former case the
1150   relevant V-bits are then generated from the original.
1151*/
1152static
1153void do_shadow_PUT ( MCEnv* mce,  Int offset,
1154                     IRAtom* atom, IRAtom* vatom )
1155{
1156   IRType ty;
1157   if (atom) {
1158      tl_assert(!vatom);
1159      tl_assert(isOriginalAtom(mce, atom));
1160      vatom = expr2vbits( mce, atom );
1161   } else {
1162      tl_assert(vatom);
1163      tl_assert(isShadowAtom(mce, vatom));
1164   }
1165
1166   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1167   tl_assert(ty != Ity_I1);
1168   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1169      /* later: no ... */
1170      /* emit code to emit a complaint if any of the vbits are 1. */
1171      /* complainIfUndefined(mce, atom); */
1172   } else {
1173      /* Do a plain shadow Put. */
1174      stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1175   }
1176}
1177
1178
1179/* Return an expression which contains the V bits corresponding to the
1180   given GETI (passed in in pieces).
1181*/
1182static
1183void do_shadow_PUTI ( MCEnv* mce,
1184                      IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1185{
1186   IRAtom* vatom;
1187   IRType  ty, tyS;
1188   Int     arrSize;;
1189
1190   tl_assert(isOriginalAtom(mce,atom));
1191   vatom = expr2vbits( mce, atom );
1192   tl_assert(sameKindedAtoms(atom, vatom));
1193   ty   = descr->elemTy;
1194   tyS  = shadowType(ty);
1195   arrSize = descr->nElems * sizeofIRType(ty);
1196   tl_assert(ty != Ity_I1);
1197   tl_assert(isOriginalAtom(mce,ix));
1198   complainIfUndefined(mce,ix);
1199   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1200      /* later: no ... */
1201      /* emit code to emit a complaint if any of the vbits are 1. */
1202      /* complainIfUndefined(mce, atom); */
1203   } else {
1204      /* Do a cloned version of the Put that refers to the shadow
1205         area. */
1206      IRRegArray* new_descr
1207         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1208                      tyS, descr->nElems);
1209      stmt( mce->bb, IRStmt_PutI( mkIRPutI( new_descr, ix, bias, vatom ) ));
1210   }
1211}
1212
1213
1214/* Return an expression which contains the V bits corresponding to the
1215   given GET (passed in in pieces).
1216*/
1217static
1218IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1219{
1220   IRType tyS = shadowType(ty);
1221   tl_assert(ty != Ity_I1);
1222   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1223      /* Always defined, return all zeroes of the relevant type */
1224      return definedOfType(tyS);
1225   } else {
1226      /* return a cloned version of the Get that refers to the shadow
1227         area. */
1228      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1229   }
1230}
1231
1232
1233/* Return an expression which contains the V bits corresponding to the
1234   given GETI (passed in in pieces).
1235*/
1236static
1237IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1238{
1239   IRType ty   = descr->elemTy;
1240   IRType tyS  = shadowType(ty);
1241   Int arrSize = descr->nElems * sizeofIRType(ty);
1242   tl_assert(ty != Ity_I1);
1243   tl_assert(isOriginalAtom(mce,ix));
1244   complainIfUndefined(mce,ix);
1245   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1246      /* Always defined, return all zeroes of the relevant type */
1247      return definedOfType(tyS);
1248   } else {
1249      /* return a cloned version of the Get that refers to the shadow
1250         area. */
1251      IRRegArray* new_descr
1252         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1253                      tyS, descr->nElems);
1254      return IRExpr_GetI( new_descr, ix, bias );
1255   }
1256}
1257
1258
1259/*------------------------------------------------------------*/
1260/*--- Generating approximations for unknown operations,    ---*/
1261/*--- using lazy-propagate semantics                       ---*/
1262/*------------------------------------------------------------*/
1263
1264/* Lazy propagation of undefinedness from two values, resulting in the
1265   specified shadow type.
1266*/
1267static
1268IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1269{
1270   /* force everything via 32-bit intermediaries. */
1271   IRAtom* at;
1272   tl_assert(isShadowAtom(mce,va1));
1273   tl_assert(isShadowAtom(mce,va2));
1274   at = mkPCastTo(mce, Ity_I32, va1);
1275   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1276   at = mkPCastTo(mce, finalVty, at);
1277   return at;
1278}
1279
1280
1281/* Do the lazy propagation game from a null-terminated vector of
1282   atoms.  This is presumably the arguments to a helper call, so the
1283   IRCallee info is also supplied in order that we can know which
1284   arguments should be ignored (via the .mcx_mask field).
1285*/
1286static
1287IRAtom* mkLazyN ( MCEnv* mce,
1288                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1289{
1290   Int i;
1291   IRAtom* here;
1292   IRAtom* curr = definedOfType(Ity_I32);
1293   for (i = 0; exprvec[i]; i++) {
1294      tl_assert(i < 32);
1295      tl_assert(isOriginalAtom(mce, exprvec[i]));
1296      /* Only take notice of this arg if the callee's mc-exclusion
1297         mask does not say it is to be excluded. */
1298      if (cee->mcx_mask & (1<<i)) {
1299         /* the arg is to be excluded from definedness checking.  Do
1300            nothing. */
1301         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1302      } else {
1303         /* calculate the arg's definedness, and pessimistically merge
1304            it in. */
1305         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1306         curr = mkUifU32(mce, here, curr);
1307      }
1308   }
1309   return mkPCastTo(mce, finalVtype, curr );
1310}
1311
1312
1313/*------------------------------------------------------------*/
1314/*--- Generating expensive sequences for exact carry-chain ---*/
1315/*--- propagation in add/sub and related operations.       ---*/
1316/*------------------------------------------------------------*/
1317
1318static
1319__attribute__((unused))
1320IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1321                                     IRAtom* aa,  IRAtom* bb )
1322{
1323   IRAtom *a_min, *b_min, *a_max, *b_max;
1324   IRType ty;
1325   IROp   opAND, opOR, opXOR, opNOT, opADD;
1326
1327   tl_assert(isShadowAtom(mce,qaa));
1328   tl_assert(isShadowAtom(mce,qbb));
1329   tl_assert(isOriginalAtom(mce,aa));
1330   tl_assert(isOriginalAtom(mce,bb));
1331   tl_assert(sameKindedAtoms(qaa,aa));
1332   tl_assert(sameKindedAtoms(qbb,bb));
1333
1334   ty    = Ity_I32;
1335   opAND = Iop_And32;
1336   opOR  = Iop_Or32;
1337   opXOR = Iop_Xor32;
1338   opNOT = Iop_Not32;
1339   opADD = Iop_Add32;
1340
1341   // a_min = aa & ~qaa
1342   a_min = assignNew(mce,ty,
1343                     binop(opAND, aa,
1344                                  assignNew(mce,ty, unop(opNOT, qaa))));
1345
1346   // b_min = bb & ~qbb
1347   b_min = assignNew(mce,ty,
1348                     binop(opAND, bb,
1349                                  assignNew(mce,ty, unop(opNOT, qbb))));
1350
1351   // a_max = aa | qaa
1352   a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1353
1354   // b_max = bb | qbb
1355   b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1356
1357   // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1358   return
1359   assignNew(mce,ty,
1360      binop( opOR,
1361             assignNew(mce,ty, binop(opOR, qaa, qbb)),
1362             assignNew(mce,ty,
1363                binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1364                             assignNew(mce,ty, binop(opADD, a_max, b_max))
1365                )
1366             )
1367      )
1368   );
1369}
1370
1371
1372/*------------------------------------------------------------*/
1373/*--- Helpers for dealing with vector primops.            ---*/
1374/*------------------------------------------------------------*/
1375
1376/* Vector pessimisation -- pessimise within each lane individually. */
1377
1378static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1379{
1380   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1381}
1382
1383static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1384{
1385   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1386}
1387
1388static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1389{
1390   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1391}
1392
1393static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1394{
1395   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1396}
1397
1398
1399/* Here's a simple scheme capable of handling ops derived from SSE1
1400   code and while only generating ops that can be efficiently
1401   implemented in SSE1. */
1402
1403/* All-lanes versions are straightforward:
1404
1405   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1406
1407   unary32Fx4(x,y)    ==> PCast32x4(x#)
1408
1409   Lowest-lane-only versions are more complex:
1410
1411   binary32F0x4(x,y)  ==> SetV128lo32(
1412                             x#,
1413                             PCast32(V128to32(UifUV128(x#,y#)))
1414                          )
1415
1416   This is perhaps not so obvious.  In particular, it's faster to
1417   do a V128-bit UifU and then take the bottom 32 bits than the more
1418   obvious scheme of taking the bottom 32 bits of each operand
1419   and doing a 32-bit UifU.  Basically since UifU is fast and
1420   chopping lanes off vector values is slow.
1421
1422   Finally:
1423
1424   unary32F0x4(x)     ==> SetV128lo32(
1425                             x#,
1426                             PCast32(V128to32(x#))
1427                          )
1428
1429   Where:
1430
1431   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1432   PCast32x4(v#) = CmpNEZ32x4(v#)
1433*/
1434
1435static
1436IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1437{
1438   IRAtom* at;
1439   tl_assert(isShadowAtom(mce, vatomX));
1440   tl_assert(isShadowAtom(mce, vatomY));
1441   at = mkUifUV128(mce, vatomX, vatomY);
1442   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1443   return at;
1444}
1445
1446static
1447IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1448{
1449   IRAtom* at;
1450   tl_assert(isShadowAtom(mce, vatomX));
1451   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1452   return at;
1453}
1454
1455static
1456IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1457{
1458   IRAtom* at;
1459   tl_assert(isShadowAtom(mce, vatomX));
1460   tl_assert(isShadowAtom(mce, vatomY));
1461   at = mkUifUV128(mce, vatomX, vatomY);
1462   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1463   at = mkPCastTo(mce, Ity_I32, at);
1464   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1465   return at;
1466}
1467
1468static
1469IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1470{
1471   IRAtom* at;
1472   tl_assert(isShadowAtom(mce, vatomX));
1473   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1474   at = mkPCastTo(mce, Ity_I32, at);
1475   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1476   return at;
1477}
1478
1479/* --- ... and ... 64Fx2 versions of the same ... --- */
1480
1481static
1482IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1483{
1484   IRAtom* at;
1485   tl_assert(isShadowAtom(mce, vatomX));
1486   tl_assert(isShadowAtom(mce, vatomY));
1487   at = mkUifUV128(mce, vatomX, vatomY);
1488   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1489   return at;
1490}
1491
1492static
1493IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1494{
1495   IRAtom* at;
1496   tl_assert(isShadowAtom(mce, vatomX));
1497   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1498   return at;
1499}
1500
1501static
1502IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1503{
1504   IRAtom* at;
1505   tl_assert(isShadowAtom(mce, vatomX));
1506   tl_assert(isShadowAtom(mce, vatomY));
1507   at = mkUifUV128(mce, vatomX, vatomY);
1508   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1509   at = mkPCastTo(mce, Ity_I64, at);
1510   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1511   return at;
1512}
1513
1514static
1515IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1516{
1517   IRAtom* at;
1518   tl_assert(isShadowAtom(mce, vatomX));
1519   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1520   at = mkPCastTo(mce, Ity_I64, at);
1521   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1522   return at;
1523}
1524
1525/* --- --- Vector saturated narrowing --- --- */
1526
1527/* This is quite subtle.  What to do is simple:
1528
1529   Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1530
1531      the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1532
1533   Why this is right is not so simple.  Consider a lane in the args,
1534   vatom1 or 2, doesn't matter.
1535
1536   After the PCast, that lane is all 0s (defined) or all
1537   1s(undefined).
1538
1539   Both signed and unsigned saturating narrowing of all 0s produces
1540   all 0s, which is what we want.
1541
1542   The all-1s case is more complex.  Unsigned narrowing interprets an
1543   all-1s input as the largest unsigned integer, and so produces all
1544   1s as a result since that is the largest unsigned value at the
1545   smaller width.
1546
1547   Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1548   to -1, so we still wind up with all 1s at the smaller width.
1549
1550   So: In short, pessimise the args, then apply the original narrowing
1551   op.
1552*/
1553static
1554IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1555                          IRAtom* vatom1, IRAtom* vatom2)
1556{
1557   IRAtom *at1, *at2, *at3;
1558   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1559   switch (narrow_op) {
1560      case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1561      case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1562      case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1563      default: VG_(tool_panic)("vectorNarrowV128");
1564   }
1565   tl_assert(isShadowAtom(mce,vatom1));
1566   tl_assert(isShadowAtom(mce,vatom2));
1567   at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1568   at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1569   at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1570   return at3;
1571}
1572
1573
1574/* --- --- Vector integer arithmetic --- --- */
1575
1576/* Simple ... UifU the args and per-lane pessimise the results. */
1577static
1578IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1579{
1580   IRAtom* at;
1581   at = mkUifUV128(mce, vatom1, vatom2);
1582   at = mkPCast8x16(mce, at);
1583   return at;
1584}
1585
1586static
1587IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1588{
1589   IRAtom* at;
1590   at = mkUifUV128(mce, vatom1, vatom2);
1591   at = mkPCast16x8(mce, at);
1592   return at;
1593}
1594
1595static
1596IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1597{
1598   IRAtom* at;
1599   at = mkUifUV128(mce, vatom1, vatom2);
1600   at = mkPCast32x4(mce, at);
1601   return at;
1602}
1603
1604static
1605IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1606{
1607   IRAtom* at;
1608   at = mkUifUV128(mce, vatom1, vatom2);
1609   at = mkPCast64x2(mce, at);
1610   return at;
1611}
1612
1613
1614/*------------------------------------------------------------*/
1615/*--- Generate shadow values from all kinds of IRExprs.    ---*/
1616/*------------------------------------------------------------*/
1617
1618static
1619IRAtom* expr2vbits_Binop ( MCEnv* mce,
1620                           IROp op,
1621                           IRAtom* atom1, IRAtom* atom2 )
1622{
1623   IRType  and_or_ty;
1624   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1625   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1626   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1627
1628   IRAtom* vatom1 = expr2vbits( mce, atom1 );
1629   IRAtom* vatom2 = expr2vbits( mce, atom2 );
1630
1631   tl_assert(isOriginalAtom(mce,atom1));
1632   tl_assert(isOriginalAtom(mce,atom2));
1633   tl_assert(isShadowAtom(mce,vatom1));
1634   tl_assert(isShadowAtom(mce,vatom2));
1635   tl_assert(sameKindedAtoms(atom1,vatom1));
1636   tl_assert(sameKindedAtoms(atom2,vatom2));
1637   switch (op) {
1638
1639      /* V128-bit SIMD (SSE2-esque) */
1640
1641      case Iop_ShrN16x8:
1642      case Iop_ShrN32x4:
1643      case Iop_ShrN64x2:
1644      case Iop_SarN16x8:
1645      case Iop_SarN32x4:
1646      case Iop_ShlN16x8:
1647      case Iop_ShlN32x4:
1648      case Iop_ShlN64x2:
1649         /* Same scheme as with all other shifts. */
1650         complainIfUndefined(mce, atom2);
1651         return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1652
1653      case Iop_QSub8Ux16:
1654      case Iop_QSub8Sx16:
1655      case Iop_Sub8x16:
1656      case Iop_Min8Ux16:
1657      case Iop_Max8Ux16:
1658      case Iop_CmpGT8Sx16:
1659      case Iop_CmpEQ8x16:
1660      case Iop_Avg8Ux16:
1661      case Iop_QAdd8Ux16:
1662      case Iop_QAdd8Sx16:
1663      case Iop_Add8x16:
1664         return binary8Ix16(mce, vatom1, vatom2);
1665
1666      case Iop_QSub16Ux8:
1667      case Iop_QSub16Sx8:
1668      case Iop_Sub16x8:
1669      case Iop_Mul16x8:
1670      case Iop_MulHi16Sx8:
1671      case Iop_MulHi16Ux8:
1672      case Iop_Min16Sx8:
1673      case Iop_Max16Sx8:
1674      case Iop_CmpGT16Sx8:
1675      case Iop_CmpEQ16x8:
1676      case Iop_Avg16Ux8:
1677      case Iop_QAdd16Ux8:
1678      case Iop_QAdd16Sx8:
1679      case Iop_Add16x8:
1680         return binary16Ix8(mce, vatom1, vatom2);
1681
1682      case Iop_Sub32x4:
1683      case Iop_QSub32Sx4:
1684      case Iop_QSub32Ux4:
1685      case Iop_CmpGT32Sx4:
1686      case Iop_CmpEQ32x4:
1687      case Iop_Add32x4:
1688      case Iop_QAdd32Ux4:
1689      case Iop_QAdd32Sx4:
1690         return binary32Ix4(mce, vatom1, vatom2);
1691
1692      case Iop_Sub64x2:
1693      case Iop_QSub64Ux2:
1694      case Iop_QSub64Sx2:
1695      case Iop_Add64x2:
1696      case Iop_QAdd64Ux2:
1697      case Iop_QAdd64Sx2:
1698         return binary64Ix2(mce, vatom1, vatom2);
1699
1700      case Iop_QNarrowBin32Sto16Sx8:
1701      case Iop_QNarrowBin16Sto8Sx16:
1702      case Iop_QNarrowBin16Sto8Ux16:
1703         return vectorNarrowV128(mce, op, vatom1, vatom2);
1704
1705      case Iop_Sub64Fx2:
1706      case Iop_Mul64Fx2:
1707      case Iop_Min64Fx2:
1708      case Iop_Max64Fx2:
1709      case Iop_Div64Fx2:
1710      case Iop_CmpLT64Fx2:
1711      case Iop_CmpLE64Fx2:
1712      case Iop_CmpEQ64Fx2:
1713      case Iop_Add64Fx2:
1714         return binary64Fx2(mce, vatom1, vatom2);
1715
1716      case Iop_Sub64F0x2:
1717      case Iop_Mul64F0x2:
1718      case Iop_Min64F0x2:
1719      case Iop_Max64F0x2:
1720      case Iop_Div64F0x2:
1721      case Iop_CmpLT64F0x2:
1722      case Iop_CmpLE64F0x2:
1723      case Iop_CmpEQ64F0x2:
1724      case Iop_Add64F0x2:
1725         return binary64F0x2(mce, vatom1, vatom2);
1726
1727      /* V128-bit SIMD (SSE1-esque) */
1728
1729      case Iop_Sub32Fx4:
1730      case Iop_Mul32Fx4:
1731      case Iop_Min32Fx4:
1732      case Iop_Max32Fx4:
1733      case Iop_Div32Fx4:
1734      case Iop_CmpLT32Fx4:
1735      case Iop_CmpLE32Fx4:
1736      case Iop_CmpEQ32Fx4:
1737      case Iop_Add32Fx4:
1738         return binary32Fx4(mce, vatom1, vatom2);
1739
1740      case Iop_Sub32F0x4:
1741      case Iop_Mul32F0x4:
1742      case Iop_Min32F0x4:
1743      case Iop_Max32F0x4:
1744      case Iop_Div32F0x4:
1745      case Iop_CmpLT32F0x4:
1746      case Iop_CmpLE32F0x4:
1747      case Iop_CmpEQ32F0x4:
1748      case Iop_Add32F0x4:
1749         return binary32F0x4(mce, vatom1, vatom2);
1750
1751      /* V128-bit data-steering */
1752      case Iop_SetV128lo32:
1753      case Iop_SetV128lo64:
1754      case Iop_64HLtoV128:
1755      case Iop_InterleaveLO64x2:
1756      case Iop_InterleaveLO32x4:
1757      case Iop_InterleaveLO16x8:
1758      case Iop_InterleaveLO8x16:
1759      case Iop_InterleaveHI64x2:
1760      case Iop_InterleaveHI32x4:
1761      case Iop_InterleaveHI16x8:
1762      case Iop_InterleaveHI8x16:
1763         return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1764
1765      /* Scalar floating point */
1766
1767         //      case Iop_RoundF64:
1768      case Iop_F64toI64S:
1769      case Iop_I64StoF64:
1770         /* First arg is I32 (rounding mode), second is F64 or I64
1771            (data). */
1772         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1773
1774      case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1775         /* Takes two F64 args. */
1776      case Iop_F64toI32S:
1777      case Iop_F64toF32:
1778         /* First arg is I32 (rounding mode), second is F64 (data). */
1779         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1780
1781      case Iop_F64toI16S:
1782         /* First arg is I32 (rounding mode), second is F64 (data). */
1783         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1784
1785      case Iop_ScaleF64:
1786      case Iop_Yl2xF64:
1787      case Iop_Yl2xp1F64:
1788      case Iop_PRemF64:
1789      case Iop_AtanF64:
1790      case Iop_AddF64:
1791      case Iop_DivF64:
1792      case Iop_SubF64:
1793      case Iop_MulF64:
1794         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1795
1796      case Iop_CmpF64:
1797         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1798
1799      /* non-FP after here */
1800
1801      case Iop_DivModU64to32:
1802      case Iop_DivModS64to32:
1803         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1804
1805      case Iop_16HLto32:
1806         return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1807      case Iop_32HLto64:
1808         return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1809
1810      case Iop_MullS32:
1811      case Iop_MullU32: {
1812         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1813         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1814         return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1815      }
1816
1817      case Iop_MullS16:
1818      case Iop_MullU16: {
1819         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1820         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1821         return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1822      }
1823
1824      case Iop_MullS8:
1825      case Iop_MullU8: {
1826         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1827         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1828         return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1829      }
1830
1831      case Iop_Add32:
1832#        if 0
1833         return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1834#        endif
1835      case Iop_Sub32:
1836      case Iop_Mul32:
1837         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1838
1839      case Iop_Mul16:
1840      case Iop_Add16:
1841      case Iop_Sub16:
1842         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1843
1844      case Iop_Sub8:
1845      case Iop_Add8:
1846         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1847
1848      case Iop_CmpLE32S: case Iop_CmpLE32U:
1849      case Iop_CmpLT32U: case Iop_CmpLT32S:
1850      case Iop_CmpEQ32: case Iop_CmpNE32:
1851         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1852
1853      case Iop_CmpEQ16: case Iop_CmpNE16:
1854         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1855
1856      case Iop_CmpEQ8: case Iop_CmpNE8:
1857         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1858
1859      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1860         /* Complain if the shift amount is undefined.  Then simply
1861            shift the first arg's V bits by the real shift amount. */
1862         complainIfUndefined(mce, atom2);
1863         return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1864
1865      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1866         /* Same scheme as with 32-bit shifts. */
1867         complainIfUndefined(mce, atom2);
1868         return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1869
1870      case Iop_Shl8: case Iop_Shr8:
1871         /* Same scheme as with 32-bit shifts. */
1872         complainIfUndefined(mce, atom2);
1873         return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1874
1875      case Iop_Shl64: case Iop_Shr64:
1876         /* Same scheme as with 32-bit shifts. */
1877         complainIfUndefined(mce, atom2);
1878         return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1879
1880      case Iop_AndV128:
1881         uifu = mkUifUV128; difd = mkDifDV128;
1882         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1883      case Iop_And64:
1884         uifu = mkUifU64; difd = mkDifD64;
1885         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1886      case Iop_And32:
1887         uifu = mkUifU32; difd = mkDifD32;
1888         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1889      case Iop_And16:
1890         uifu = mkUifU16; difd = mkDifD16;
1891         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1892      case Iop_And8:
1893         uifu = mkUifU8; difd = mkDifD8;
1894         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1895
1896      case Iop_OrV128:
1897         uifu = mkUifUV128; difd = mkDifDV128;
1898         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1899      case Iop_Or64:
1900         uifu = mkUifU64; difd = mkDifD64;
1901         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1902      case Iop_Or32:
1903         uifu = mkUifU32; difd = mkDifD32;
1904         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1905      case Iop_Or16:
1906         uifu = mkUifU16; difd = mkDifD16;
1907         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1908      case Iop_Or8:
1909         uifu = mkUifU8; difd = mkDifD8;
1910         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1911
1912      do_And_Or:
1913         return
1914         assignNew(
1915            mce,
1916            and_or_ty,
1917            difd(mce, uifu(mce, vatom1, vatom2),
1918                      difd(mce, improve(mce, atom1, vatom1),
1919                                improve(mce, atom2, vatom2) ) ) );
1920
1921      case Iop_Xor8:
1922         return mkUifU8(mce, vatom1, vatom2);
1923      case Iop_Xor16:
1924         return mkUifU16(mce, vatom1, vatom2);
1925      case Iop_Xor32:
1926         return mkUifU32(mce, vatom1, vatom2);
1927      case Iop_Xor64:
1928         return mkUifU64(mce, vatom1, vatom2);
1929      case Iop_XorV128:
1930         return mkUifUV128(mce, vatom1, vatom2);
1931
1932      default:
1933         ppIROp(op);
1934         VG_(tool_panic)("memcheck:expr2vbits_Binop");
1935   }
1936}
1937
1938
1939static
1940IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1941{
1942   IRAtom* vatom = expr2vbits( mce, atom );
1943   tl_assert(isOriginalAtom(mce,atom));
1944   switch (op) {
1945
1946      case Iop_Sqrt64Fx2:
1947         return unary64Fx2(mce, vatom);
1948
1949      case Iop_Sqrt64F0x2:
1950         return unary64F0x2(mce, vatom);
1951
1952      case Iop_Sqrt32Fx4:
1953      case Iop_RSqrt32Fx4:
1954      case Iop_Recip32Fx4:
1955         return unary32Fx4(mce, vatom);
1956
1957      case Iop_Sqrt32F0x4:
1958      case Iop_RSqrt32F0x4:
1959      case Iop_Recip32F0x4:
1960         return unary32F0x4(mce, vatom);
1961
1962      case Iop_32UtoV128:
1963      case Iop_64UtoV128:
1964         return assignNew(mce, Ity_V128, unop(op, vatom));
1965
1966      case Iop_F32toF64:
1967      case Iop_I32StoF64:
1968      case Iop_NegF64:
1969      case Iop_SinF64:
1970      case Iop_CosF64:
1971      case Iop_TanF64:
1972      case Iop_SqrtF64:
1973      case Iop_AbsF64:
1974      case Iop_2xm1F64:
1975         return mkPCastTo(mce, Ity_I64, vatom);
1976
1977      case Iop_Clz32:
1978      case Iop_Ctz32:
1979         return mkPCastTo(mce, Ity_I32, vatom);
1980
1981      case Iop_32Sto64:
1982      case Iop_32Uto64:
1983      case Iop_V128to64:
1984      case Iop_V128HIto64:
1985         return assignNew(mce, Ity_I64, unop(op, vatom));
1986
1987      case Iop_64to32:
1988      case Iop_64HIto32:
1989      case Iop_1Uto32:
1990      case Iop_8Uto32:
1991      case Iop_16Uto32:
1992      case Iop_16Sto32:
1993      case Iop_8Sto32:
1994         return assignNew(mce, Ity_I32, unop(op, vatom));
1995
1996      case Iop_8Sto16:
1997      case Iop_8Uto16:
1998      case Iop_32to16:
1999      case Iop_32HIto16:
2000         return assignNew(mce, Ity_I16, unop(op, vatom));
2001
2002      case Iop_1Uto8:
2003      case Iop_16to8:
2004      case Iop_32to8:
2005         return assignNew(mce, Ity_I8, unop(op, vatom));
2006
2007      case Iop_32to1:
2008         return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
2009
2010      case Iop_ReinterpF64asI64:
2011      case Iop_ReinterpI64asF64:
2012      case Iop_ReinterpI32asF32:
2013      case Iop_NotV128:
2014      case Iop_Not64:
2015      case Iop_Not32:
2016      case Iop_Not16:
2017      case Iop_Not8:
2018      case Iop_Not1:
2019         return vatom;
2020
2021      default:
2022         ppIROp(op);
2023         VG_(tool_panic)("memcheck:expr2vbits_Unop");
2024   }
2025}
2026
2027
2028/* Worker function; do not call directly. */
2029static
2030IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2031{
2032   void*    helper;
2033   HChar*   hname;
2034   IRDirty* di;
2035   IRTemp   datavbits;
2036   IRAtom*  addrAct;
2037
2038   tl_assert(isOriginalAtom(mce,addr));
2039
2040   /* First, emit a definedness test for the address.  This also sets
2041      the address (shadow) to 'defined' following the test. */
2042   complainIfUndefined( mce, addr );
2043
2044   /* Now cook up a call to the relevant helper function, to read the
2045      data V bits from shadow memory. */
2046   ty = shadowType(ty);
2047   switch (ty) {
2048      case Ity_I64: helper = &MC_(helperc_LOADV8);
2049                    hname = "MC_(helperc_LOADV8)";
2050                    break;
2051      case Ity_I32: helper = &MC_(helperc_LOADV4);
2052                    hname = "MC_(helperc_LOADV4)";
2053                    break;
2054      case Ity_I16: helper = &MC_(helperc_LOADV2);
2055                    hname = "MC_(helperc_LOADV2)";
2056                    break;
2057      case Ity_I8:  helper = &MC_(helperc_LOADV1);
2058                    hname = "MC_(helperc_LOADV1)";
2059                    break;
2060      default:      ppIRType(ty);
2061                    VG_(tool_panic)("memcheck:do_shadow_LDle");
2062   }
2063
2064   /* Generate the actual address into addrAct. */
2065   if (bias == 0) {
2066      addrAct = addr;
2067   } else {
2068      IROp    mkAdd;
2069      IRAtom* eBias;
2070      IRType  tyAddr  = mce->hWordTy;
2071      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2072      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2073      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2074      addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2075   }
2076
2077   /* We need to have a place to park the V bits we're just about to
2078      read. */
2079   datavbits = newIRTemp(mce->bb->tyenv, ty);
2080   di = unsafeIRDirty_1_N( datavbits,
2081                           1/*regparms*/, hname, helper,
2082                           mkIRExprVec_1( addrAct ));
2083   setHelperAnns( mce, di );
2084   stmt( mce->bb, IRStmt_Dirty(di) );
2085
2086   return mkexpr(datavbits);
2087}
2088
2089
2090static
2091IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2092{
2093   IRAtom *v64hi, *v64lo;
2094   switch (shadowType(ty)) {
2095      case Ity_I8:
2096      case Ity_I16:
2097      case Ity_I32:
2098      case Ity_I64:
2099         return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2100      case Ity_V128:
2101         v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2102         v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2103         return assignNew( mce,
2104                           Ity_V128,
2105                           binop(Iop_64HLtoV128, v64hi, v64lo));
2106      default:
2107         VG_(tool_panic)("expr2vbits_LDle");
2108   }
2109}
2110
2111
2112static
2113IRAtom* expr2vbits_ITE ( MCEnv* mce,
2114                         IRAtom* cond, IRAtom* iftrue, IRAtom* iffalse )
2115{
2116   IRAtom *vbitsC, *vbits0, *vbits1;
2117   IRType ty;
2118   /* Given ITE(cond,iftrue,iffalse), generate
2119         ITE(cond,iftrue#,iffalse#) `UifU` PCast(cond#)
2120      That is, steer the V bits like the originals, but trash the
2121      result if the steering value is undefined.  This gives
2122      lazy propagation. */
2123   tl_assert(isOriginalAtom(mce, cond));
2124   tl_assert(isOriginalAtom(mce, iftrue));
2125   tl_assert(isOriginalAtom(mce, iffalse));
2126
2127   vbitsC = expr2vbits(mce, cond);
2128   vbits0 = expr2vbits(mce, iffalse);
2129   vbits1 = expr2vbits(mce, iftrue);
2130   ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2131
2132   return
2133      mkUifU(mce, ty, assignNew(mce, ty, IRExpr_ITE(cond, vbits1, vbits0)),
2134                      mkPCastTo(mce, ty, vbitsC) );
2135}
2136
2137/* --------- This is the main expression-handling function. --------- */
2138
2139static
2140IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2141{
2142   switch (e->tag) {
2143
2144      case Iex_Get:
2145         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2146
2147      case Iex_GetI:
2148         return shadow_GETI( mce, e->Iex.GetI.descr,
2149                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
2150
2151      case Iex_RdTmp:
2152         return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2153
2154      case Iex_Const:
2155         return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2156
2157      case Iex_Binop:
2158         return expr2vbits_Binop(
2159                   mce,
2160                   e->Iex.Binop.op,
2161                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
2162                );
2163
2164      case Iex_Unop:
2165         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2166
2167      case Iex_Load:
2168         return expr2vbits_LDle( mce, e->Iex.Load.ty,
2169                                      e->Iex.Load.addr, 0/*addr bias*/ );
2170
2171      case Iex_CCall:
2172         return mkLazyN( mce, e->Iex.CCall.args,
2173                              e->Iex.CCall.retty,
2174                              e->Iex.CCall.cee );
2175
2176      case Iex_ITE:
2177         return expr2vbits_ITE( mce, e->Iex.ITE.cond, e->Iex.ITE.iftrue,
2178                                e->Iex.ITE.iffalse);
2179
2180      default:
2181         VG_(printf)("\n");
2182         ppIRExpr(e);
2183         VG_(printf)("\n");
2184         VG_(tool_panic)("memcheck: expr2vbits");
2185   }
2186}
2187
2188/*------------------------------------------------------------*/
2189/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2190/*------------------------------------------------------------*/
2191
2192/* Widen a value to the host word size. */
2193
2194static
2195IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2196{
2197   IRType ty, tyH;
2198
2199   /* vatom is vbits-value and as such can only have a shadow type. */
2200   tl_assert(isShadowAtom(mce,vatom));
2201
2202   ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2203   tyH = mce->hWordTy;
2204
2205   if (tyH == Ity_I32) {
2206      switch (ty) {
2207         case Ity_I32: return vatom;
2208         case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2209         case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2210         default:      goto unhandled;
2211      }
2212   } else {
2213      goto unhandled;
2214   }
2215  unhandled:
2216   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2217   VG_(tool_panic)("zwidenToHostWord");
2218}
2219
2220
2221/* Generate a shadow store.  addr is always the original address atom.
2222   You can pass in either originals or V-bits for the data atom, but
2223   obviously not both.  */
2224
2225static
2226void do_shadow_STle ( MCEnv* mce,
2227                      IRAtom* addr, UInt bias,
2228                      IRAtom* data, IRAtom* vdata )
2229{
2230   IROp     mkAdd;
2231   IRType   ty, tyAddr;
2232   IRDirty  *di, *diLo64, *diHi64;
2233   IRAtom   *addrAct, *addrLo64, *addrHi64;
2234   IRAtom   *vdataLo64, *vdataHi64;
2235   IRAtom   *eBias, *eBias0, *eBias8;
2236   void*    helper = NULL;
2237   HChar*   hname = NULL;
2238
2239   tyAddr = mce->hWordTy;
2240   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2241   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2242
2243   di = diLo64 = diHi64 = NULL;
2244   eBias = eBias0 = eBias8 = NULL;
2245   addrAct = addrLo64 = addrHi64 = NULL;
2246   vdataLo64 = vdataHi64 = NULL;
2247
2248   if (data) {
2249      tl_assert(!vdata);
2250      tl_assert(isOriginalAtom(mce, data));
2251      tl_assert(bias == 0);
2252      vdata = expr2vbits( mce, data );
2253   } else {
2254      tl_assert(vdata);
2255   }
2256
2257   tl_assert(isOriginalAtom(mce,addr));
2258   tl_assert(isShadowAtom(mce,vdata));
2259
2260   ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2261
2262   /* First, emit a definedness test for the address.  This also sets
2263      the address (shadow) to 'defined' following the test. */
2264   complainIfUndefined( mce, addr );
2265
2266   /* Now decide which helper function to call to write the data V
2267      bits into shadow memory. */
2268   switch (ty) {
2269      case Ity_V128: /* we'll use the helper twice */
2270      case Ity_I64: helper = &MC_(helperc_STOREV8);
2271                    hname = "MC_(helperc_STOREV8)";
2272                    break;
2273      case Ity_I32: helper = &MC_(helperc_STOREV4);
2274                    hname = "MC_(helperc_STOREV4)";
2275                    break;
2276      case Ity_I16: helper = &MC_(helperc_STOREV2);
2277                    hname = "MC_(helperc_STOREV2)";
2278                    break;
2279      case Ity_I8:  helper = &MC_(helperc_STOREV1);
2280                    hname = "MC_(helperc_STOREV1)";
2281                    break;
2282      default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2283   }
2284
2285   if (ty == Ity_V128) {
2286
2287      /* V128-bit case */
2288      /* See comment in next clause re 64-bit regparms */
2289      eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2290      addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2291      vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2292      diLo64    = unsafeIRDirty_0_N(
2293                     1/*regparms*/, hname, helper,
2294                     mkIRExprVec_2( addrLo64, vdataLo64 ));
2295
2296      eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2297      addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2298      vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2299      diHi64    = unsafeIRDirty_0_N(
2300                     1/*regparms*/, hname, helper,
2301                     mkIRExprVec_2( addrHi64, vdataHi64 ));
2302
2303      setHelperAnns( mce, diLo64 );
2304      setHelperAnns( mce, diHi64 );
2305      stmt( mce->bb, IRStmt_Dirty(diLo64) );
2306      stmt( mce->bb, IRStmt_Dirty(diHi64) );
2307
2308   } else {
2309
2310      /* 8/16/32/64-bit cases */
2311      /* Generate the actual address into addrAct. */
2312      if (bias == 0) {
2313         addrAct = addr;
2314      } else {
2315         eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2316         addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2317      }
2318
2319      if (ty == Ity_I64) {
2320         /* We can't do this with regparm 2 on 32-bit platforms, since
2321            the back ends aren't clever enough to handle 64-bit
2322            regparm args.  Therefore be different. */
2323         di = unsafeIRDirty_0_N(
2324                 1/*regparms*/, hname, helper,
2325                 mkIRExprVec_2( addrAct, vdata ));
2326      } else {
2327         di = unsafeIRDirty_0_N(
2328                 2/*regparms*/, hname, helper,
2329                 mkIRExprVec_2( addrAct,
2330                                zwidenToHostWord( mce, vdata )));
2331      }
2332      setHelperAnns( mce, di );
2333      stmt( mce->bb, IRStmt_Dirty(di) );
2334   }
2335
2336}
2337
2338
2339/* Do lazy pessimistic propagation through a dirty helper call, by
2340   looking at the annotations on it.  This is the most complex part of
2341   Memcheck. */
2342
2343static IRType szToITy ( Int n )
2344{
2345   switch (n) {
2346      case 1: return Ity_I8;
2347      case 2: return Ity_I16;
2348      case 4: return Ity_I32;
2349      case 8: return Ity_I64;
2350      default: VG_(tool_panic)("szToITy(memcheck)");
2351   }
2352}
2353
2354static
2355void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2356{
2357   Int     i, n, offset, toDo, gSz, gOff;
2358   IRAtom  *src, *here, *curr;
2359   IRType  tyAddr, tySrc, tyDst;
2360   IRTemp  dst;
2361
2362   /* First check the guard. */
2363   complainIfUndefined(mce, d->guard);
2364
2365   /* Now round up all inputs and PCast over them. */
2366   curr = definedOfType(Ity_I32);
2367
2368   /* Inputs: unmasked args */
2369   for (i = 0; d->args[i]; i++) {
2370      if (d->cee->mcx_mask & (1<<i)) {
2371         /* ignore this arg */
2372      } else {
2373         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2374         curr = mkUifU32(mce, here, curr);
2375      }
2376   }
2377
2378   /* Inputs: guest state that we read. */
2379   for (i = 0; i < d->nFxState; i++) {
2380      tl_assert(d->fxState[i].fx != Ifx_None);
2381      if (d->fxState[i].fx == Ifx_Write)
2382         continue;
2383
2384      /* Ignore any sections marked as 'always defined'. */
2385      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2386         if (0)
2387         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2388                     d->fxState[i].offset, d->fxState[i].size );
2389         continue;
2390      }
2391
2392      /* This state element is read or modified.  So we need to
2393         consider it.  If larger than 8 bytes, deal with it in 8-byte
2394         chunks. */
2395      gSz  = d->fxState[i].size;
2396      gOff = d->fxState[i].offset;
2397      tl_assert(gSz > 0);
2398      while (True) {
2399         if (gSz == 0) break;
2400         n = gSz <= 8 ? gSz : 8;
2401         /* update 'curr' with UifU of the state slice
2402            gOff .. gOff+n-1 */
2403         tySrc = szToITy( n );
2404         src   = assignNew( mce, tySrc,
2405                            shadow_GET(mce, gOff, tySrc ) );
2406         here = mkPCastTo( mce, Ity_I32, src );
2407         curr = mkUifU32(mce, here, curr);
2408         gSz -= n;
2409         gOff += n;
2410      }
2411
2412   }
2413
2414   /* Inputs: memory.  First set up some info needed regardless of
2415      whether we're doing reads or writes. */
2416   tyAddr = Ity_INVALID;
2417
2418   if (d->mFx != Ifx_None) {
2419      /* Because we may do multiple shadow loads/stores from the same
2420         base address, it's best to do a single test of its
2421         definedness right now.  Post-instrumentation optimisation
2422         should remove all but this test. */
2423      tl_assert(d->mAddr);
2424      complainIfUndefined(mce, d->mAddr);
2425
2426      tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2427      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2428      tl_assert(tyAddr == mce->hWordTy); /* not really right */
2429   }
2430
2431   /* Deal with memory inputs (reads or modifies) */
2432   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2433      offset = 0;
2434      toDo   = d->mSize;
2435      /* chew off 32-bit chunks */
2436      while (toDo >= 4) {
2437         here = mkPCastTo(
2438                   mce, Ity_I32,
2439                   expr2vbits_LDle ( mce, Ity_I32,
2440                                     d->mAddr, d->mSize - toDo )
2441                );
2442         curr = mkUifU32(mce, here, curr);
2443         toDo -= 4;
2444      }
2445      /* chew off 16-bit chunks */
2446      while (toDo >= 2) {
2447         here = mkPCastTo(
2448                   mce, Ity_I32,
2449                   expr2vbits_LDle ( mce, Ity_I16,
2450                                     d->mAddr, d->mSize - toDo )
2451                );
2452         curr = mkUifU32(mce, here, curr);
2453         toDo -= 2;
2454      }
2455      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2456   }
2457
2458   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2459      all the inputs to the helper.  Now we need to re-distribute the
2460      results to all destinations. */
2461
2462   /* Outputs: the destination temporary, if there is one. */
2463   if (d->tmp != IRTemp_INVALID) {
2464      dst   = findShadowTmp(mce, d->tmp);
2465      tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2466      assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2467   }
2468
2469   /* Outputs: guest state that we write or modify. */
2470   for (i = 0; i < d->nFxState; i++) {
2471      tl_assert(d->fxState[i].fx != Ifx_None);
2472      if (d->fxState[i].fx == Ifx_Read)
2473         continue;
2474      /* Ignore any sections marked as 'always defined'. */
2475      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2476         continue;
2477      /* This state element is written or modified.  So we need to
2478         consider it.  If larger than 8 bytes, deal with it in 8-byte
2479         chunks. */
2480      gSz  = d->fxState[i].size;
2481      gOff = d->fxState[i].offset;
2482      tl_assert(gSz > 0);
2483      while (True) {
2484         if (gSz == 0) break;
2485         n = gSz <= 8 ? gSz : 8;
2486         /* Write suitably-casted 'curr' to the state slice
2487            gOff .. gOff+n-1 */
2488         tyDst = szToITy( n );
2489         do_shadow_PUT( mce, gOff,
2490                             NULL, /* original atom */
2491                             mkPCastTo( mce, tyDst, curr ) );
2492         gSz -= n;
2493         gOff += n;
2494      }
2495   }
2496
2497   /* Outputs: memory that we write or modify. */
2498   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2499      offset = 0;
2500      toDo   = d->mSize;
2501      /* chew off 32-bit chunks */
2502      while (toDo >= 4) {
2503         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2504                         NULL, /* original data */
2505                         mkPCastTo( mce, Ity_I32, curr ) );
2506         toDo -= 4;
2507      }
2508      /* chew off 16-bit chunks */
2509      while (toDo >= 2) {
2510         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2511                         NULL, /* original data */
2512                         mkPCastTo( mce, Ity_I16, curr ) );
2513         toDo -= 2;
2514      }
2515      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2516   }
2517
2518}
2519
2520
2521/*------------------------------------------------------------*/
2522/*--- Memcheck main                                        ---*/
2523/*------------------------------------------------------------*/
2524
2525static Bool isBogusAtom ( IRAtom* at )
2526{
2527   ULong n = 0;
2528   IRConst* con;
2529   tl_assert(isIRAtom(at));
2530   if (at->tag == Iex_RdTmp)
2531      return False;
2532   tl_assert(at->tag == Iex_Const);
2533   con = at->Iex.Const.con;
2534   switch (con->tag) {
2535      case Ico_U8:  n = (ULong)con->Ico.U8; break;
2536      case Ico_U16: n = (ULong)con->Ico.U16; break;
2537      case Ico_U32: n = (ULong)con->Ico.U32; break;
2538      case Ico_U64: n = (ULong)con->Ico.U64; break;
2539      default: ppIRExpr(at); tl_assert(0);
2540   }
2541   /* VG_(printf)("%llx\n", n); */
2542   return (n == 0xFEFEFEFF
2543           || n == 0x80808080
2544           || n == 0x1010101
2545           || n == 1010100);
2546}
2547
2548__attribute__((unused))
2549static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2550{
2551   Int     i;
2552   IRExpr* e;
2553   switch (st->tag) {
2554      case Ist_WrTmp:
2555         e = st->Ist.WrTmp.data;
2556         switch (e->tag) {
2557            case Iex_Get:
2558            case Iex_RdTmp:
2559               return False;
2560            case Iex_Unop:
2561               return isBogusAtom(e->Iex.Unop.arg);
2562            case Iex_Binop:
2563               return isBogusAtom(e->Iex.Binop.arg1)
2564                      || isBogusAtom(e->Iex.Binop.arg2);
2565            case Iex_ITE:
2566               return isBogusAtom(e->Iex.ITE.cond)
2567                      || isBogusAtom(e->Iex.ITE.iftrue)
2568                      || isBogusAtom(e->Iex.ITE.iffalse);
2569            case Iex_Load:
2570               return isBogusAtom(e->Iex.Load.addr);
2571            case Iex_CCall:
2572               for (i = 0; e->Iex.CCall.args[i]; i++)
2573                  if (isBogusAtom(e->Iex.CCall.args[i]))
2574                     return True;
2575               return False;
2576            default:
2577               goto unhandled;
2578         }
2579      case Ist_Put:
2580         return isBogusAtom(st->Ist.Put.data);
2581      case Ist_Store:
2582         return isBogusAtom(st->Ist.Store.addr)
2583                || isBogusAtom(st->Ist.Store.data);
2584      case Ist_Exit:
2585         return isBogusAtom(st->Ist.Exit.guard);
2586      default:
2587      unhandled:
2588         ppIRStmt(st);
2589         VG_(tool_panic)("hasBogusLiterals");
2590   }
2591}
2592
2593IRSB* mc_instrument ( void* closureV,
2594                      IRSB* bb_in, VexGuestLayout* layout,
2595                      VexGuestExtents* vge,
2596                      IRType gWordTy, IRType hWordTy )
2597{
2598   Bool verboze = False; //True;
2599
2600   /* Bool hasBogusLiterals = False; */
2601
2602   Int i, j, first_stmt;
2603   IRStmt* st;
2604   MCEnv mce;
2605
2606   /* Set up BB */
2607   IRSB* bb     = emptyIRSB();
2608   bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2609   bb->next     = deepCopyIRExpr(bb_in->next);
2610   bb->jumpkind = bb_in->jumpkind;
2611
2612   /* Set up the running environment.  Only .bb is modified as we go
2613      along. */
2614   mce.bb             = bb;
2615   mce.layout         = layout;
2616   mce.n_originalTmps = bb->tyenv->types_used;
2617   mce.hWordTy        = hWordTy;
2618   mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2619   for (i = 0; i < mce.n_originalTmps; i++)
2620      mce.tmpMap[i] = IRTemp_INVALID;
2621
2622   /* Iterate over the stmts. */
2623
2624   for (i = 0; i <  bb_in->stmts_used; i++) {
2625      st = bb_in->stmts[i];
2626      if (!st) continue;
2627
2628      tl_assert(isFlatIRStmt(st));
2629
2630      /*
2631      if (!hasBogusLiterals) {
2632         hasBogusLiterals = checkForBogusLiterals(st);
2633         if (hasBogusLiterals) {
2634            VG_(printf)("bogus: ");
2635            ppIRStmt(st);
2636            VG_(printf)("\n");
2637         }
2638      }
2639      */
2640      first_stmt = bb->stmts_used;
2641
2642      if (verboze) {
2643         ppIRStmt(st);
2644         VG_(printf)("\n\n");
2645      }
2646
2647      switch (st->tag) {
2648
2649         case Ist_WrTmp:
2650            assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2651                        expr2vbits( &mce, st->Ist.WrTmp.data) );
2652            break;
2653
2654         case Ist_Put:
2655            do_shadow_PUT( &mce,
2656                           st->Ist.Put.offset,
2657                           st->Ist.Put.data,
2658                           NULL /* shadow atom */ );
2659            break;
2660
2661         case Ist_PutI:
2662            do_shadow_PUTI( &mce,
2663                            st->Ist.PutI.details->descr,
2664                            st->Ist.PutI.details->ix,
2665                            st->Ist.PutI.details->bias,
2666                            st->Ist.PutI.details->data );
2667            break;
2668
2669         case Ist_Store:
2670            do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2671                                  st->Ist.Store.data,
2672                                  NULL /* shadow data */ );
2673            break;
2674
2675         case Ist_Exit:
2676            /* if (!hasBogusLiterals) */
2677               complainIfUndefined( &mce, st->Ist.Exit.guard );
2678            break;
2679
2680         case Ist_Dirty:
2681            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2682            break;
2683
2684         case Ist_IMark:
2685         case Ist_NoOp:
2686            break;
2687
2688         default:
2689            VG_(printf)("\n");
2690            ppIRStmt(st);
2691            VG_(printf)("\n");
2692            VG_(tool_panic)("memcheck: unhandled IRStmt");
2693
2694      } /* switch (st->tag) */
2695
2696      if (verboze) {
2697         for (j = first_stmt; j < bb->stmts_used; j++) {
2698            VG_(printf)("   ");
2699            ppIRStmt(bb->stmts[j]);
2700            VG_(printf)("\n");
2701         }
2702         VG_(printf)("\n");
2703      }
2704
2705      addStmtToIRSB(bb, st);
2706
2707   }
2708
2709   /* Now we need to complain if the jump target is undefined. */
2710   first_stmt = bb->stmts_used;
2711
2712   if (verboze) {
2713      VG_(printf)("bb->next = ");
2714      ppIRExpr(bb->next);
2715      VG_(printf)("\n\n");
2716   }
2717
2718   complainIfUndefined( &mce, bb->next );
2719
2720   if (verboze) {
2721      for (j = first_stmt; j < bb->stmts_used; j++) {
2722         VG_(printf)("   ");
2723         ppIRStmt(bb->stmts[j]);
2724         VG_(printf)("\n");
2725      }
2726      VG_(printf)("\n");
2727   }
2728
2729   return bb;
2730}
2731#endif /* UNUSED */
2732
2733/*--------------------------------------------------------------------*/
2734/*--- end                                              test_main.c ---*/
2735/*--------------------------------------------------------------------*/
2736