1
2/*---------------------------------------------------------------*/
3/*--- begin                                       test_main.c ---*/
4/*---------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2004-2011 OpenWorks LLP
11      info@open-works.net
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
26   02110-1301, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29
30   Neither the names of the U.S. Department of Energy nor the
31   University of California nor the names of its contributors may be
32   used to endorse or promote products derived from this software
33   without prior written permission.
34*/
35
36#include <stdio.h>
37#include <stdlib.h>
38#include <assert.h>
39#include <string.h>
40
41#include "libvex_basictypes.h"
42#include "libvex.h"
43
44#include "test_main.h"
45
46
47/*---------------------------------------------------------------*/
48/*--- Test                                                    ---*/
49/*---------------------------------------------------------------*/
50
51
52__attribute__ ((noreturn))
53static
54void failure_exit ( void )
55{
56   fprintf(stdout, "VEX did failure_exit.  Bye.\n");
57   exit(1);
58}
59
60static
61void log_bytes ( HChar* bytes, Int nbytes )
62{
63   fwrite ( bytes, 1, nbytes, stdout );
64}
65
66#define N_LINEBUF 10000
67static HChar linebuf[N_LINEBUF];
68
69#define N_ORIGBUF 10000
70#define N_TRANSBUF 5000
71
72static UChar origbuf[N_ORIGBUF];
73static UChar transbuf[N_TRANSBUF];
74
75static Bool verbose = True;
76
77/* Forwards */
78#if 1 /* UNUSED */
79//static IRSB* ac_instrument ( IRSB*, VexGuestLayout*, IRType );
80static
81IRSB* mc_instrument ( void* closureV,
82                      IRSB* bb_in, VexGuestLayout* layout,
83                      VexGuestExtents* vge,
84                      IRType gWordTy, IRType hWordTy );
85#endif
86
87static Bool chase_into_not_ok ( void* opaque, Addr64 dst ) {
88   return False;
89}
90static UInt needs_self_check ( void* opaque, VexGuestExtents* vge ) {
91   return 0;
92}
93
94int main ( int argc, char** argv )
95{
96   FILE* f;
97   Int i;
98   UInt u, sum;
99   Addr32 orig_addr;
100   Int bb_number, n_bbs_done = 0;
101   Int orig_nbytes, trans_used;
102   VexTranslateResult tres;
103   VexControl vcon;
104   VexGuestExtents vge;
105   VexArchInfo vai_x86, vai_amd64, vai_ppc32;
106   VexAbiInfo vbi;
107   VexTranslateArgs vta;
108
109   if (argc != 2) {
110      fprintf(stderr, "usage: vex file.org\n");
111      exit(1);
112   }
113   f = fopen(argv[1], "r");
114   if (!f) {
115      fprintf(stderr, "can't open `%s'\n", argv[1]);
116      exit(1);
117   }
118
119   /* Run with default params.  However, we can't allow bb chasing
120      since that causes the front end to get segfaults when it tries
121      to read code outside the initial BB we hand it.  So when calling
122      LibVEX_Translate, send in a chase-into predicate that always
123      returns False. */
124   LibVEX_default_VexControl ( &vcon );
125   vcon.iropt_level = 2;
126   vcon.guest_max_insns = 50;
127
128   LibVEX_Init ( &failure_exit, &log_bytes,
129                 1,  /* debug_paranoia */
130                 TEST_VSUPPORT, /* valgrind support */
131                 &vcon );
132
133
134   while (!feof(f)) {
135
136      __attribute__((unused))
137      char* unused1 = fgets(linebuf, N_LINEBUF,f);
138      if (linebuf[0] == 0) continue;
139      if (linebuf[0] != '.') continue;
140
141      if (n_bbs_done == TEST_N_BBS) break;
142      n_bbs_done++;
143
144      /* first line is:   . bb-number bb-addr n-bytes */
145      assert(3 == sscanf(&linebuf[1], " %d %x %d\n",
146                                 & bb_number,
147                                 & orig_addr, & orig_nbytes ));
148      assert(orig_nbytes >= 1);
149      assert(!feof(f));
150      __attribute__((unused))
151      char* unused2 = fgets(linebuf, N_LINEBUF,f);
152      assert(linebuf[0] == '.');
153
154      /* second line is:   . byte byte byte etc */
155      if (verbose)
156         printf("============ Basic Block %d, Done %d, "
157                "Start %x, nbytes %2d ============",
158                bb_number, n_bbs_done-1, orig_addr, orig_nbytes);
159
160      assert(orig_nbytes >= 1 && orig_nbytes <= N_ORIGBUF);
161      for (i = 0; i < orig_nbytes; i++) {
162         assert(1 == sscanf(&linebuf[2 + 3*i], "%x", &u));
163         origbuf[i] = (UChar)u;
164      }
165
166      /* FIXME: put sensible values into the .hwcaps fields */
167      LibVEX_default_VexArchInfo(&vai_x86);
168      vai_x86.hwcaps = VEX_HWCAPS_X86_SSE1
169                       | VEX_HWCAPS_X86_SSE2 | VEX_HWCAPS_X86_SSE3;
170
171      LibVEX_default_VexArchInfo(&vai_amd64);
172      vai_amd64.hwcaps = 0;
173
174      LibVEX_default_VexArchInfo(&vai_ppc32);
175      vai_ppc32.hwcaps = 0;
176      vai_ppc32.ppc_cache_line_szB = 128;
177
178      LibVEX_default_VexAbiInfo(&vbi);
179
180      /* ----- Set up args for LibVEX_Translate ----- */
181#if 0 /* ppc32 -> ppc32 */
182      vta.arch_guest     = VexArchPPC32;
183      vta.archinfo_guest = vai_ppc32;
184      vta.arch_host      = VexArchPPC32;
185      vta.archinfo_host  = vai_ppc32;
186#endif
187#if 0 /* amd64 -> amd64 */
188      vta.arch_guest     = VexArchAMD64;
189      vta.archinfo_guest = vai_amd64;
190      vta.arch_host      = VexArchAMD64;
191      vta.archinfo_host  = vai_amd64;
192#endif
193#if 1 /* x86 -> x86 */
194      vta.arch_guest     = VexArchX86;
195      vta.archinfo_guest = vai_x86;
196      vta.arch_host      = VexArchX86;
197      vta.archinfo_host  = vai_x86;
198#endif
199      vta.abiinfo_both    = vbi;
200      vta.guest_bytes     = origbuf;
201      vta.guest_bytes_addr = (Addr64)orig_addr;
202      vta.callback_opaque = NULL;
203      vta.chase_into_ok   = chase_into_not_ok;
204      vta.guest_extents   = &vge;
205      vta.host_bytes      = transbuf;
206      vta.host_bytes_size = N_TRANSBUF;
207      vta.host_bytes_used = &trans_used;
208#if 0 /* no instrumentation */
209      vta.instrument1     = NULL;
210      vta.instrument2     = NULL;
211#endif
212#if 0 /* addrcheck */
213      vta.instrument1     = ac_instrument;
214      vta.instrument2     = NULL;
215#endif
216#if 1 /* memcheck */
217      vta.instrument1     = mc_instrument;
218      vta.instrument2     = NULL;
219#endif
220      vta.needs_self_check  = needs_self_check;
221      vta.preamble_function = NULL;
222      vta.traceflags      = TEST_FLAGS;
223#if 1 /* x86, amd64 hosts */
224      vta.dispatch_unassisted = (void*)0x12345678;
225      vta.dispatch_assisted   = (void*)0x12345678;
226#else /* ppc32, ppc64 hosts */
227      vta.dispatch        = NULL;
228#endif
229
230      vta.finaltidy = NULL;
231
232      for (i = 0; i < TEST_N_ITERS; i++)
233         tres = LibVEX_Translate ( &vta );
234
235      if (tres.status != VexTransOK)
236         printf("\ntres = %d\n", (Int)tres.status);
237      assert(tres.status == VexTransOK);
238      assert(tres.n_sc_extents == 0);
239      assert(vge.n_used == 1);
240      assert((UInt)(vge.len[0]) == orig_nbytes);
241
242      sum = 0;
243      for (i = 0; i < trans_used; i++)
244         sum += (UInt)transbuf[i];
245      printf ( " %6.2f ... %u\n",
246               (double)trans_used / (double)vge.len[0], sum );
247   }
248
249   fclose(f);
250   printf("\n");
251   LibVEX_ShowAllocStats();
252
253   return 0;
254}
255
256//////////////////////////////////////////////////////////////////////
257//////////////////////////////////////////////////////////////////////
258//////////////////////////////////////////////////////////////////////
259//////////////////////////////////////////////////////////////////////
260//////////////////////////////////////////////////////////////////////
261//////////////////////////////////////////////////////////////////////
262//////////////////////////////////////////////////////////////////////
263//////////////////////////////////////////////////////////////////////
264
265#if 0 /* UNUSED */
266
267static
268__attribute((noreturn))
269void panic ( HChar* s )
270{
271  printf("\npanic: %s\n", s);
272  failure_exit();
273}
274
275static
276IRSB* ac_instrument (IRSB* bb_in, VexGuestLayout* layout, IRType hWordTy )
277{
278/* Use this rather than eg. -1 because it's a UInt. */
279#define INVALID_DATA_SIZE   999999
280
281   Int         i;
282   Int         sz;
283   IRCallee*   helper;
284   IRStmt*    st;
285   IRExpr* data;
286   IRExpr* addr;
287   Bool needSz;
288
289   /* Set up BB */
290   IRSB* bb     = emptyIRSB();
291   bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
292   bb->next     = dopyIRExpr(bb_in->next);
293   bb->jumpkind = bb_in->jumpkind;
294
295   /* No loads to consider in ->next. */
296   assert(isIRAtom(bb_in->next));
297
298   for (i = 0; i <  bb_in->stmts_used; i++) {
299      st = bb_in->stmts[i];
300      if (!st) continue;
301
302      switch (st->tag) {
303
304         case Ist_Tmp:
305            data = st->Ist.Tmp.data;
306            if (data->tag == Iex_LDle) {
307               addr = data->Iex.LDle.addr;
308               sz = sizeofIRType(data->Iex.LDle.ty);
309               needSz = False;
310               switch (sz) {
311                  case 4: helper = mkIRCallee(1, "ac_helperc_LOAD4",
312                                                 (void*)0x12345601); break;
313                  case 2: helper = mkIRCallee(0, "ac_helperc_LOAD2",
314                                                 (void*)0x12345602); break;
315                  case 1: helper = mkIRCallee(1, "ac_helperc_LOAD1",
316                                                 (void*)0x12345603); break;
317                  default: helper = mkIRCallee(0, "ac_helperc_LOADN",
318                                                  (void*)0x12345604);
319                                                  needSz = True; break;
320               }
321               if (needSz) {
322                  addStmtToIRSB(
323                     bb,
324                     IRStmt_Dirty(
325                        unsafeIRDirty_0_N( helper->regparms,
326					   helper->name, helper->addr,
327                                           mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
328                  ));
329               } else {
330                  addStmtToIRSB(
331                     bb,
332                     IRStmt_Dirty(
333                        unsafeIRDirty_0_N( helper->regparms,
334					   helper->name, helper->addr,
335                                           mkIRExprVec_1(addr) )
336                  ));
337               }
338            }
339            break;
340
341         case Ist_STle:
342            data = st->Ist.STle.data;
343            addr = st->Ist.STle.addr;
344            assert(isIRAtom(data));
345            assert(isIRAtom(addr));
346            sz = sizeofIRType(typeOfIRExpr(bb_in->tyenv, data));
347            needSz = False;
348            switch (sz) {
349               case 4: helper = mkIRCallee(1, "ac_helperc_STORE4",
350                                              (void*)0x12345605); break;
351               case 2: helper = mkIRCallee(0, "ac_helperc_STORE2",
352                                              (void*)0x12345606); break;
353               case 1: helper = mkIRCallee(1, "ac_helperc_STORE1",
354                                              (void*)0x12345607); break;
355               default: helper = mkIRCallee(0, "ac_helperc_STOREN",
356                                               (void*)0x12345608);
357                                               needSz = True; break;
358            }
359            if (needSz) {
360               addStmtToIRSB(
361                  bb,
362                  IRStmt_Dirty(
363                     unsafeIRDirty_0_N( helper->regparms,
364    				        helper->name, helper->addr,
365                                        mkIRExprVec_2(addr, mkIRExpr_HWord(sz)))
366               ));
367            } else {
368               addStmtToIRSB(
369                  bb,
370                  IRStmt_Dirty(
371                     unsafeIRDirty_0_N( helper->regparms,
372                                        helper->name, helper->addr,
373                                        mkIRExprVec_1(addr) )
374               ));
375            }
376            break;
377
378         case Ist_Put:
379            assert(isIRAtom(st->Ist.Put.data));
380            break;
381
382         case Ist_PutI:
383            assert(isIRAtom(st->Ist.PutI.ix));
384            assert(isIRAtom(st->Ist.PutI.data));
385            break;
386
387         case Ist_Exit:
388            assert(isIRAtom(st->Ist.Exit.guard));
389            break;
390
391         case Ist_Dirty:
392            /* If the call doesn't interact with memory, we ain't
393               interested. */
394            if (st->Ist.Dirty.details->mFx == Ifx_None)
395               break;
396            goto unhandled;
397
398         default:
399         unhandled:
400            printf("\n");
401            ppIRStmt(st);
402            printf("\n");
403            panic("addrcheck: unhandled IRStmt");
404      }
405
406      addStmtToIRSB( bb, dopyIRStmt(st));
407   }
408
409   return bb;
410}
411#endif /* UNUSED */
412
413//////////////////////////////////////////////////////////////////////
414//////////////////////////////////////////////////////////////////////
415//////////////////////////////////////////////////////////////////////
416//////////////////////////////////////////////////////////////////////
417//////////////////////////////////////////////////////////////////////
418//////////////////////////////////////////////////////////////////////
419//////////////////////////////////////////////////////////////////////
420//////////////////////////////////////////////////////////////////////
421
422#if 1 /* UNUSED */
423
424static
425__attribute((noreturn))
426void panic ( HChar* s )
427{
428  printf("\npanic: %s\n", s);
429  failure_exit();
430}
431
432#define tl_assert(xxx) assert(xxx)
433#define VG_(xxxx) xxxx
434#define tool_panic(zzz) panic(zzz)
435#define MC_(zzzz) MC_##zzzz
436#define TL_(zzzz) SK_##zzzz
437
438
439static void MC_helperc_complain_undef ( void );
440static void MC_helperc_LOADV8 ( void );
441static void MC_helperc_LOADV4 ( void );
442static void MC_helperc_LOADV2 ( void );
443static void MC_helperc_LOADV1 ( void );
444static void MC_helperc_STOREV8( void );
445static void MC_helperc_STOREV4( void );
446static void MC_helperc_STOREV2( void );
447static void MC_helperc_STOREV1( void );
448static void MC_helperc_value_check0_fail( void );
449static void MC_helperc_value_check1_fail( void );
450static void MC_helperc_value_check4_fail( void );
451
452static void MC_helperc_complain_undef ( void ) { }
453static void MC_helperc_LOADV8 ( void ) { }
454static void MC_helperc_LOADV4 ( void ) { }
455static void MC_helperc_LOADV2 ( void ) { }
456static void MC_helperc_LOADV1 ( void ) { }
457static void MC_helperc_STOREV8( void ) { }
458static void MC_helperc_STOREV4( void ) { }
459static void MC_helperc_STOREV2( void ) { }
460static void MC_helperc_STOREV1( void ) { }
461static void MC_helperc_value_check0_fail( void ) { }
462static void MC_helperc_value_check1_fail( void ) { }
463static void MC_helperc_value_check4_fail( void ) { }
464
465
466/*--------------------------------------------------------------------*/
467/*--- Instrument IR to perform memory checking operations.         ---*/
468/*---                                               mc_translate.c ---*/
469/*--------------------------------------------------------------------*/
470
471/*
472   This file is part of MemCheck, a heavyweight Valgrind tool for
473   detecting memory errors.
474
475   Copyright (C) 2000-2011 Julian Seward
476      jseward@acm.org
477
478   This program is free software; you can redistribute it and/or
479   modify it under the terms of the GNU General Public License as
480   published by the Free Software Foundation; either version 2 of the
481   License, or (at your option) any later version.
482
483   This program is distributed in the hope that it will be useful, but
484   WITHOUT ANY WARRANTY; without even the implied warranty of
485   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
486   General Public License for more details.
487
488   You should have received a copy of the GNU General Public License
489   along with this program; if not, write to the Free Software
490   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
491   02111-1307, USA.
492
493   The GNU General Public License is contained in the file COPYING.
494*/
495
496//#include "mc_include.h"
497
498
499/*------------------------------------------------------------*/
500/*--- Forward decls                                        ---*/
501/*------------------------------------------------------------*/
502
503struct _MCEnv;
504
505static IRType  shadowType ( IRType ty );
506static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
507
508
509/*------------------------------------------------------------*/
510/*--- Memcheck running state, and tmp management.          ---*/
511/*------------------------------------------------------------*/
512
513/* Carries around state during memcheck instrumentation. */
514typedef
515   struct _MCEnv {
516      /* MODIFIED: the bb being constructed.  IRStmts are added. */
517      IRSB* bb;
518
519      /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
520         original temps to their current their current shadow temp.
521         Initially all entries are IRTemp_INVALID.  Entries are added
522         lazily since many original temps are not used due to
523         optimisation prior to instrumentation.  Note that floating
524         point original tmps are shadowed by integer tmps of the same
525         size, and Bit-typed original tmps are shadowed by the type
526         Ity_I8.  See comment below. */
527      IRTemp* tmpMap;
528      Int     n_originalTmps; /* for range checking */
529
530      /* READONLY: the guest layout.  This indicates which parts of
531         the guest state should be regarded as 'always defined'. */
532      VexGuestLayout* layout;
533      /* READONLY: the host word type.  Needed for constructing
534         arguments of type 'HWord' to be passed to helper functions.
535         Ity_I32 or Ity_I64 only. */
536      IRType hWordTy;
537   }
538   MCEnv;
539
540/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
541   demand), as they are encountered.  This is for two reasons.
542
543   (1) (less important reason): Many original tmps are unused due to
544   initial IR optimisation, and we do not want to spaces in tables
545   tracking them.
546
547   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
548   table indexed [0 .. n_types-1], which gives the current shadow for
549   each original tmp, or INVALID_IRTEMP if none is so far assigned.
550   It is necessary to support making multiple assignments to a shadow
551   -- specifically, after testing a shadow for definedness, it needs
552   to be made defined.  But IR's SSA property disallows this.
553
554   (2) (more important reason): Therefore, when a shadow needs to get
555   a new value, a new temporary is created, the value is assigned to
556   that, and the tmpMap is updated to reflect the new binding.
557
558   A corollary is that if the tmpMap maps a given tmp to
559   INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
560   there's a read-before-write error in the original tmps.  The IR
561   sanity checker should catch all such anomalies, however.
562*/
563
564/* Find the tmp currently shadowing the given original tmp.  If none
565   so far exists, allocate one.  */
566static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
567{
568   tl_assert(orig < mce->n_originalTmps);
569   if (mce->tmpMap[orig] == IRTemp_INVALID) {
570      mce->tmpMap[orig]
571         = newIRTemp(mce->bb->tyenv,
572                     shadowType(mce->bb->tyenv->types[orig]));
573   }
574   return mce->tmpMap[orig];
575}
576
577/* Allocate a new shadow for the given original tmp.  This means any
578   previous shadow is abandoned.  This is needed because it is
579   necessary to give a new value to a shadow once it has been tested
580   for undefinedness, but unfortunately IR's SSA property disallows
581   this.  Instead we must abandon the old shadow, allocate a new one
582   and use that instead. */
583static void newShadowTmp ( MCEnv* mce, IRTemp orig )
584{
585   tl_assert(orig < mce->n_originalTmps);
586   mce->tmpMap[orig]
587      = newIRTemp(mce->bb->tyenv,
588                  shadowType(mce->bb->tyenv->types[orig]));
589}
590
591
592/*------------------------------------------------------------*/
593/*--- IRAtoms -- a subset of IRExprs                       ---*/
594/*------------------------------------------------------------*/
595
596/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
597   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
598   input, most of this code deals in atoms.  Usefully, a value atom
599   always has a V-value which is also an atom: constants are shadowed
600   by constants, and temps are shadowed by the corresponding shadow
601   temporary. */
602
603typedef  IRExpr  IRAtom;
604
605/* (used for sanity checks only): is this an atom which looks
606   like it's from original code? */
607static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
608{
609   if (a1->tag == Iex_Const)
610      return True;
611   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp < mce->n_originalTmps)
612      return True;
613   return False;
614}
615
616/* (used for sanity checks only): is this an atom which looks
617   like it's from shadow code? */
618static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
619{
620   if (a1->tag == Iex_Const)
621      return True;
622   if (a1->tag == Iex_RdTmp && a1->Iex.RdTmp.tmp >= mce->n_originalTmps)
623      return True;
624   return False;
625}
626
627/* (used for sanity checks only): check that both args are atoms and
628   are identically-kinded. */
629static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
630{
631   if (a1->tag == Iex_RdTmp && a1->tag == Iex_RdTmp)
632      return True;
633   if (a1->tag == Iex_Const && a1->tag == Iex_Const)
634      return True;
635   return False;
636}
637
638
639/*------------------------------------------------------------*/
640/*--- Type management                                      ---*/
641/*------------------------------------------------------------*/
642
643/* Shadow state is always accessed using integer types.  This returns
644   an integer type with the same size (as per sizeofIRType) as the
645   given type.  The only valid shadow types are Bit, I8, I16, I32,
646   I64, V128. */
647
648static IRType shadowType ( IRType ty )
649{
650   switch (ty) {
651      case Ity_I1:
652      case Ity_I8:
653      case Ity_I16:
654      case Ity_I32:
655      case Ity_I64:  return ty;
656      case Ity_F32:  return Ity_I32;
657      case Ity_F64:  return Ity_I64;
658      case Ity_V128: return Ity_V128;
659      default: ppIRType(ty);
660               VG_(tool_panic)("memcheck:shadowType");
661   }
662}
663
664/* Produce a 'defined' value of the given shadow type.  Should only be
665   supplied shadow types (Bit/I8/I16/I32/UI64). */
666static IRExpr* definedOfType ( IRType ty ) {
667   switch (ty) {
668      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
669      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
670      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
671      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
672      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
673      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
674      default:      VG_(tool_panic)("memcheck:definedOfType");
675   }
676}
677
678
679/*------------------------------------------------------------*/
680/*--- Constructing IR fragments                            ---*/
681/*------------------------------------------------------------*/
682
683/* assign value to tmp */
684#define assign(_bb,_tmp,_expr)   \
685   addStmtToIRSB((_bb), IRStmt_WrTmp((_tmp),(_expr)))
686
687/* add stmt to a bb */
688#define stmt(_bb,_stmt)    \
689   addStmtToIRSB((_bb), (_stmt))
690
691/* build various kinds of expressions */
692#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
693#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
694#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
695#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
696#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
697#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
698#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
699#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
700
701/* bind the given expression to a new temporary, and return the
702   temporary.  This effectively converts an arbitrary expression into
703   an atom. */
704static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
705   IRTemp t = newIRTemp(mce->bb->tyenv, ty);
706   assign(mce->bb, t, e);
707   return mkexpr(t);
708}
709
710
711/*------------------------------------------------------------*/
712/*--- Constructing definedness primitive ops               ---*/
713/*------------------------------------------------------------*/
714
715/* --------- Defined-if-either-defined --------- */
716
717static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
718   tl_assert(isShadowAtom(mce,a1));
719   tl_assert(isShadowAtom(mce,a2));
720   return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
721}
722
723static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
724   tl_assert(isShadowAtom(mce,a1));
725   tl_assert(isShadowAtom(mce,a2));
726   return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
727}
728
729static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
730   tl_assert(isShadowAtom(mce,a1));
731   tl_assert(isShadowAtom(mce,a2));
732   return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
733}
734
735static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
736   tl_assert(isShadowAtom(mce,a1));
737   tl_assert(isShadowAtom(mce,a2));
738   return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
739}
740
741static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
742   tl_assert(isShadowAtom(mce,a1));
743   tl_assert(isShadowAtom(mce,a2));
744   return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
745}
746
747/* --------- Undefined-if-either-undefined --------- */
748
749static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
750   tl_assert(isShadowAtom(mce,a1));
751   tl_assert(isShadowAtom(mce,a2));
752   return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
753}
754
755static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
756   tl_assert(isShadowAtom(mce,a1));
757   tl_assert(isShadowAtom(mce,a2));
758   return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
759}
760
761static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
762   tl_assert(isShadowAtom(mce,a1));
763   tl_assert(isShadowAtom(mce,a2));
764   return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
765}
766
767static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
768   tl_assert(isShadowAtom(mce,a1));
769   tl_assert(isShadowAtom(mce,a2));
770   return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
771}
772
773static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
774   tl_assert(isShadowAtom(mce,a1));
775   tl_assert(isShadowAtom(mce,a2));
776   return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
777}
778
779static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
780   switch (vty) {
781      case Ity_I8:   return mkUifU8(mce, a1, a2);
782      case Ity_I16:  return mkUifU16(mce, a1, a2);
783      case Ity_I32:  return mkUifU32(mce, a1, a2);
784      case Ity_I64:  return mkUifU64(mce, a1, a2);
785      case Ity_V128: return mkUifUV128(mce, a1, a2);
786      default:
787         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
788         VG_(tool_panic)("memcheck:mkUifU");
789   }
790}
791
792/* --------- The Left-family of operations. --------- */
793
794static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
795   tl_assert(isShadowAtom(mce,a1));
796   /* It's safe to duplicate a1 since it's only an atom */
797   return assignNew(mce, Ity_I8,
798                    binop(Iop_Or8, a1,
799                          assignNew(mce, Ity_I8,
800                                    /* unop(Iop_Neg8, a1)))); */
801                                    binop(Iop_Sub8, mkU8(0), a1) )));
802}
803
804static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
805   tl_assert(isShadowAtom(mce,a1));
806   /* It's safe to duplicate a1 since it's only an atom */
807   return assignNew(mce, Ity_I16,
808                    binop(Iop_Or16, a1,
809                          assignNew(mce, Ity_I16,
810                                    /* unop(Iop_Neg16, a1)))); */
811                                    binop(Iop_Sub16, mkU16(0), a1) )));
812}
813
814static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
815   tl_assert(isShadowAtom(mce,a1));
816   /* It's safe to duplicate a1 since it's only an atom */
817   return assignNew(mce, Ity_I32,
818                    binop(Iop_Or32, a1,
819                          assignNew(mce, Ity_I32,
820                                    /* unop(Iop_Neg32, a1)))); */
821                                    binop(Iop_Sub32, mkU32(0), a1) )));
822}
823
824/* --------- 'Improvement' functions for AND/OR. --------- */
825
826/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
827   defined (0); all other -> undefined (1).
828*/
829static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
830{
831   tl_assert(isOriginalAtom(mce, data));
832   tl_assert(isShadowAtom(mce, vbits));
833   tl_assert(sameKindedAtoms(data, vbits));
834   return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
835}
836
837static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
838{
839   tl_assert(isOriginalAtom(mce, data));
840   tl_assert(isShadowAtom(mce, vbits));
841   tl_assert(sameKindedAtoms(data, vbits));
842   return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
843}
844
845static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
846{
847   tl_assert(isOriginalAtom(mce, data));
848   tl_assert(isShadowAtom(mce, vbits));
849   tl_assert(sameKindedAtoms(data, vbits));
850   return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
851}
852
853static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
854{
855   tl_assert(isOriginalAtom(mce, data));
856   tl_assert(isShadowAtom(mce, vbits));
857   tl_assert(sameKindedAtoms(data, vbits));
858   return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
859}
860
861static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
862{
863   tl_assert(isOriginalAtom(mce, data));
864   tl_assert(isShadowAtom(mce, vbits));
865   tl_assert(sameKindedAtoms(data, vbits));
866   return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
867}
868
869/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
870   defined (0); all other -> undefined (1).
871*/
872static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
873{
874   tl_assert(isOriginalAtom(mce, data));
875   tl_assert(isShadowAtom(mce, vbits));
876   tl_assert(sameKindedAtoms(data, vbits));
877   return assignNew(
878             mce, Ity_I8,
879             binop(Iop_Or8,
880                   assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
881                   vbits) );
882}
883
884static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
885{
886   tl_assert(isOriginalAtom(mce, data));
887   tl_assert(isShadowAtom(mce, vbits));
888   tl_assert(sameKindedAtoms(data, vbits));
889   return assignNew(
890             mce, Ity_I16,
891             binop(Iop_Or16,
892                   assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
893                   vbits) );
894}
895
896static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
897{
898   tl_assert(isOriginalAtom(mce, data));
899   tl_assert(isShadowAtom(mce, vbits));
900   tl_assert(sameKindedAtoms(data, vbits));
901   return assignNew(
902             mce, Ity_I32,
903             binop(Iop_Or32,
904                   assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
905                   vbits) );
906}
907
908static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
909{
910   tl_assert(isOriginalAtom(mce, data));
911   tl_assert(isShadowAtom(mce, vbits));
912   tl_assert(sameKindedAtoms(data, vbits));
913   return assignNew(
914             mce, Ity_I64,
915             binop(Iop_Or64,
916                   assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
917                   vbits) );
918}
919
920static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
921{
922   tl_assert(isOriginalAtom(mce, data));
923   tl_assert(isShadowAtom(mce, vbits));
924   tl_assert(sameKindedAtoms(data, vbits));
925   return assignNew(
926             mce, Ity_V128,
927             binop(Iop_OrV128,
928                   assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
929                   vbits) );
930}
931
932/* --------- Pessimising casts. --------- */
933
934static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
935{
936   IRType  ty;
937   IRAtom* tmp1;
938   /* Note, dst_ty is a shadow type, not an original type. */
939   /* First of all, collapse vbits down to a single bit. */
940   tl_assert(isShadowAtom(mce,vbits));
941   ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
942   tmp1 = NULL;
943   switch (ty) {
944      case Ity_I1:
945         tmp1 = vbits;
946         break;
947      case Ity_I8:
948         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE8, vbits, mkU8(0)));
949         break;
950      case Ity_I16:
951         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE16, vbits, mkU16(0)));
952         break;
953      case Ity_I32:
954         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE32, vbits, mkU32(0)));
955         break;
956      case Ity_I64:
957         tmp1 = assignNew(mce, Ity_I1, binop(Iop_CmpNE64, vbits, mkU64(0)));
958         break;
959      default:
960         VG_(tool_panic)("mkPCastTo(1)");
961   }
962   tl_assert(tmp1);
963   /* Now widen up to the dst type. */
964   switch (dst_ty) {
965      case Ity_I1:
966         return tmp1;
967      case Ity_I8:
968         return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
969      case Ity_I16:
970         return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
971      case Ity_I32:
972         return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
973      case Ity_I64:
974         return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
975      case Ity_V128:
976         tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
977         tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
978         return tmp1;
979      default:
980         ppIRType(dst_ty);
981         VG_(tool_panic)("mkPCastTo(2)");
982   }
983}
984
985
986/*------------------------------------------------------------*/
987/*--- Emit a test and complaint if something is undefined. ---*/
988/*------------------------------------------------------------*/
989
990/* Set the annotations on a dirty helper to indicate that the stack
991   pointer and instruction pointers might be read.  This is the
992   behaviour of all 'emit-a-complaint' style functions we might
993   call. */
994
995static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
996   di->nFxState = 2;
997   di->fxState[0].fx     = Ifx_Read;
998   di->fxState[0].offset = mce->layout->offset_SP;
999   di->fxState[0].size   = mce->layout->sizeof_SP;
1000   di->fxState[1].fx     = Ifx_Read;
1001   di->fxState[1].offset = mce->layout->offset_IP;
1002   di->fxState[1].size   = mce->layout->sizeof_IP;
1003}
1004
1005
1006/* Check the supplied **original** atom for undefinedness, and emit a
1007   complaint if so.  Once that happens, mark it as defined.  This is
1008   possible because the atom is either a tmp or literal.  If it's a
1009   tmp, it will be shadowed by a tmp, and so we can set the shadow to
1010   be defined.  In fact as mentioned above, we will have to allocate a
1011   new tmp to carry the new 'defined' shadow value, and update the
1012   original->tmp mapping accordingly; we cannot simply assign a new
1013   value to an existing shadow tmp as this breaks SSAness -- resulting
1014   in the post-instrumentation sanity checker spluttering in disapproval.
1015*/
1016static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1017{
1018   IRAtom*  vatom;
1019   IRType   ty;
1020   Int      sz;
1021   IRDirty* di;
1022   IRAtom*  cond;
1023
1024   /* Since the original expression is atomic, there's no duplicated
1025      work generated by making multiple V-expressions for it.  So we
1026      don't really care about the possibility that someone else may
1027      also create a V-interpretion for it. */
1028   tl_assert(isOriginalAtom(mce, atom));
1029   vatom = expr2vbits( mce, atom );
1030   tl_assert(isShadowAtom(mce, vatom));
1031   tl_assert(sameKindedAtoms(atom, vatom));
1032
1033   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1034
1035   /* sz is only used for constructing the error message */
1036   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1037
1038   cond = mkPCastTo( mce, Ity_I1, vatom );
1039   /* cond will be 0 if all defined, and 1 if any not defined. */
1040
1041   switch (sz) {
1042      case 0:
1043         di = unsafeIRDirty_0_N( 0/*regparms*/,
1044                                 "MC_(helperc_value_check0_fail)",
1045                                 &MC_(helperc_value_check0_fail),
1046                                 mkIRExprVec_0()
1047                               );
1048         break;
1049      case 1:
1050         di = unsafeIRDirty_0_N( 0/*regparms*/,
1051                                 "MC_(helperc_value_check1_fail)",
1052                                 &MC_(helperc_value_check1_fail),
1053                                 mkIRExprVec_0()
1054                               );
1055         break;
1056      case 4:
1057         di = unsafeIRDirty_0_N( 0/*regparms*/,
1058                                 "MC_(helperc_value_check4_fail)",
1059                                 &MC_(helperc_value_check4_fail),
1060                                 mkIRExprVec_0()
1061                               );
1062         break;
1063      default:
1064         di = unsafeIRDirty_0_N( 1/*regparms*/,
1065                                 "MC_(helperc_complain_undef)",
1066                                 &MC_(helperc_complain_undef),
1067                                 mkIRExprVec_1( mkIRExpr_HWord( sz ))
1068                               );
1069         break;
1070   }
1071   di->guard = cond;
1072   setHelperAnns( mce, di );
1073   stmt( mce->bb, IRStmt_Dirty(di));
1074
1075   /* Set the shadow tmp to be defined.  First, update the
1076      orig->shadow tmp mapping to reflect the fact that this shadow is
1077      getting a new value. */
1078   tl_assert(isIRAtom(vatom));
1079   /* sameKindedAtoms ... */
1080   if (vatom->tag == Iex_RdTmp) {
1081      tl_assert(atom->tag == Iex_RdTmp);
1082      newShadowTmp(mce, atom->Iex.RdTmp.tmp);
1083      assign(mce->bb, findShadowTmp(mce, atom->Iex.RdTmp.tmp),
1084                      definedOfType(ty));
1085   }
1086}
1087
1088
1089/*------------------------------------------------------------*/
1090/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1091/*------------------------------------------------------------*/
1092
1093/* Examine the always-defined sections declared in layout to see if
1094   the (offset,size) section is within one.  Note, is is an error to
1095   partially fall into such a region: (offset,size) should either be
1096   completely in such a region or completely not-in such a region.
1097*/
1098static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1099{
1100   Int minoffD, maxoffD, i;
1101   Int minoff = offset;
1102   Int maxoff = minoff + size - 1;
1103   tl_assert((minoff & ~0xFFFF) == 0);
1104   tl_assert((maxoff & ~0xFFFF) == 0);
1105
1106   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1107      minoffD = mce->layout->alwaysDefd[i].offset;
1108      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1109      tl_assert((minoffD & ~0xFFFF) == 0);
1110      tl_assert((maxoffD & ~0xFFFF) == 0);
1111
1112      if (maxoff < minoffD || maxoffD < minoff)
1113         continue; /* no overlap */
1114      if (minoff >= minoffD && maxoff <= maxoffD)
1115         return True; /* completely contained in an always-defd section */
1116
1117      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1118   }
1119   return False; /* could not find any containing section */
1120}
1121
1122
1123/* Generate into bb suitable actions to shadow this Put.  If the state
1124   slice is marked 'always defined', do nothing.  Otherwise, write the
1125   supplied V bits to the shadow state.  We can pass in either an
1126   original atom or a V-atom, but not both.  In the former case the
1127   relevant V-bits are then generated from the original.
1128*/
1129static
1130void do_shadow_PUT ( MCEnv* mce,  Int offset,
1131                     IRAtom* atom, IRAtom* vatom )
1132{
1133   IRType ty;
1134   if (atom) {
1135      tl_assert(!vatom);
1136      tl_assert(isOriginalAtom(mce, atom));
1137      vatom = expr2vbits( mce, atom );
1138   } else {
1139      tl_assert(vatom);
1140      tl_assert(isShadowAtom(mce, vatom));
1141   }
1142
1143   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
1144   tl_assert(ty != Ity_I1);
1145   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1146      /* later: no ... */
1147      /* emit code to emit a complaint if any of the vbits are 1. */
1148      /* complainIfUndefined(mce, atom); */
1149   } else {
1150      /* Do a plain shadow Put. */
1151      stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1152   }
1153}
1154
1155
1156/* Return an expression which contains the V bits corresponding to the
1157   given GETI (passed in in pieces).
1158*/
1159static
1160void do_shadow_PUTI ( MCEnv* mce,
1161                      IRRegArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
1162{
1163   IRAtom* vatom;
1164   IRType  ty, tyS;
1165   Int     arrSize;;
1166
1167   tl_assert(isOriginalAtom(mce,atom));
1168   vatom = expr2vbits( mce, atom );
1169   tl_assert(sameKindedAtoms(atom, vatom));
1170   ty   = descr->elemTy;
1171   tyS  = shadowType(ty);
1172   arrSize = descr->nElems * sizeofIRType(ty);
1173   tl_assert(ty != Ity_I1);
1174   tl_assert(isOriginalAtom(mce,ix));
1175   complainIfUndefined(mce,ix);
1176   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1177      /* later: no ... */
1178      /* emit code to emit a complaint if any of the vbits are 1. */
1179      /* complainIfUndefined(mce, atom); */
1180   } else {
1181      /* Do a cloned version of the Put that refers to the shadow
1182         area. */
1183      IRRegArray* new_descr
1184         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1185                      tyS, descr->nElems);
1186      stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
1187   }
1188}
1189
1190
1191/* Return an expression which contains the V bits corresponding to the
1192   given GET (passed in in pieces).
1193*/
1194static
1195IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1196{
1197   IRType tyS = shadowType(ty);
1198   tl_assert(ty != Ity_I1);
1199   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1200      /* Always defined, return all zeroes of the relevant type */
1201      return definedOfType(tyS);
1202   } else {
1203      /* return a cloned version of the Get that refers to the shadow
1204         area. */
1205      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1206   }
1207}
1208
1209
1210/* Return an expression which contains the V bits corresponding to the
1211   given GETI (passed in in pieces).
1212*/
1213static
1214IRExpr* shadow_GETI ( MCEnv* mce, IRRegArray* descr, IRAtom* ix, Int bias )
1215{
1216   IRType ty   = descr->elemTy;
1217   IRType tyS  = shadowType(ty);
1218   Int arrSize = descr->nElems * sizeofIRType(ty);
1219   tl_assert(ty != Ity_I1);
1220   tl_assert(isOriginalAtom(mce,ix));
1221   complainIfUndefined(mce,ix);
1222   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1223      /* Always defined, return all zeroes of the relevant type */
1224      return definedOfType(tyS);
1225   } else {
1226      /* return a cloned version of the Get that refers to the shadow
1227         area. */
1228      IRRegArray* new_descr
1229         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1230                      tyS, descr->nElems);
1231      return IRExpr_GetI( new_descr, ix, bias );
1232   }
1233}
1234
1235
1236/*------------------------------------------------------------*/
1237/*--- Generating approximations for unknown operations,    ---*/
1238/*--- using lazy-propagate semantics                       ---*/
1239/*------------------------------------------------------------*/
1240
1241/* Lazy propagation of undefinedness from two values, resulting in the
1242   specified shadow type.
1243*/
1244static
1245IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1246{
1247   /* force everything via 32-bit intermediaries. */
1248   IRAtom* at;
1249   tl_assert(isShadowAtom(mce,va1));
1250   tl_assert(isShadowAtom(mce,va2));
1251   at = mkPCastTo(mce, Ity_I32, va1);
1252   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1253   at = mkPCastTo(mce, finalVty, at);
1254   return at;
1255}
1256
1257
1258/* Do the lazy propagation game from a null-terminated vector of
1259   atoms.  This is presumably the arguments to a helper call, so the
1260   IRCallee info is also supplied in order that we can know which
1261   arguments should be ignored (via the .mcx_mask field).
1262*/
1263static
1264IRAtom* mkLazyN ( MCEnv* mce,
1265                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1266{
1267   Int i;
1268   IRAtom* here;
1269   IRAtom* curr = definedOfType(Ity_I32);
1270   for (i = 0; exprvec[i]; i++) {
1271      tl_assert(i < 32);
1272      tl_assert(isOriginalAtom(mce, exprvec[i]));
1273      /* Only take notice of this arg if the callee's mc-exclusion
1274         mask does not say it is to be excluded. */
1275      if (cee->mcx_mask & (1<<i)) {
1276         /* the arg is to be excluded from definedness checking.  Do
1277            nothing. */
1278         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1279      } else {
1280         /* calculate the arg's definedness, and pessimistically merge
1281            it in. */
1282         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
1283         curr = mkUifU32(mce, here, curr);
1284      }
1285   }
1286   return mkPCastTo(mce, finalVtype, curr );
1287}
1288
1289
1290/*------------------------------------------------------------*/
1291/*--- Generating expensive sequences for exact carry-chain ---*/
1292/*--- propagation in add/sub and related operations.       ---*/
1293/*------------------------------------------------------------*/
1294
1295static
1296__attribute__((unused))
1297IRAtom* expensiveAdd32 ( MCEnv* mce, IRAtom* qaa, IRAtom* qbb,
1298                                     IRAtom* aa,  IRAtom* bb )
1299{
1300   IRAtom *a_min, *b_min, *a_max, *b_max;
1301   IRType ty;
1302   IROp   opAND, opOR, opXOR, opNOT, opADD;
1303
1304   tl_assert(isShadowAtom(mce,qaa));
1305   tl_assert(isShadowAtom(mce,qbb));
1306   tl_assert(isOriginalAtom(mce,aa));
1307   tl_assert(isOriginalAtom(mce,bb));
1308   tl_assert(sameKindedAtoms(qaa,aa));
1309   tl_assert(sameKindedAtoms(qbb,bb));
1310
1311   ty    = Ity_I32;
1312   opAND = Iop_And32;
1313   opOR  = Iop_Or32;
1314   opXOR = Iop_Xor32;
1315   opNOT = Iop_Not32;
1316   opADD = Iop_Add32;
1317
1318   // a_min = aa & ~qaa
1319   a_min = assignNew(mce,ty,
1320                     binop(opAND, aa,
1321                                  assignNew(mce,ty, unop(opNOT, qaa))));
1322
1323   // b_min = bb & ~qbb
1324   b_min = assignNew(mce,ty,
1325                     binop(opAND, bb,
1326                                  assignNew(mce,ty, unop(opNOT, qbb))));
1327
1328   // a_max = aa | qaa
1329   a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1330
1331   // b_max = bb | qbb
1332   b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1333
1334   // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1335   return
1336   assignNew(mce,ty,
1337      binop( opOR,
1338             assignNew(mce,ty, binop(opOR, qaa, qbb)),
1339             assignNew(mce,ty,
1340                binop(opXOR, assignNew(mce,ty, binop(opADD, a_min, b_min)),
1341                             assignNew(mce,ty, binop(opADD, a_max, b_max))
1342                )
1343             )
1344      )
1345   );
1346}
1347
1348
1349/*------------------------------------------------------------*/
1350/*--- Helpers for dealing with vector primops.            ---*/
1351/*------------------------------------------------------------*/
1352
1353/* Vector pessimisation -- pessimise within each lane individually. */
1354
1355static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1356{
1357   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1358}
1359
1360static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1361{
1362   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1363}
1364
1365static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1366{
1367   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1368}
1369
1370static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1371{
1372   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1373}
1374
1375
1376/* Here's a simple scheme capable of handling ops derived from SSE1
1377   code and while only generating ops that can be efficiently
1378   implemented in SSE1. */
1379
1380/* All-lanes versions are straightforward:
1381
1382   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1383
1384   unary32Fx4(x,y)    ==> PCast32x4(x#)
1385
1386   Lowest-lane-only versions are more complex:
1387
1388   binary32F0x4(x,y)  ==> SetV128lo32(
1389                             x#,
1390                             PCast32(V128to32(UifUV128(x#,y#)))
1391                          )
1392
1393   This is perhaps not so obvious.  In particular, it's faster to
1394   do a V128-bit UifU and then take the bottom 32 bits than the more
1395   obvious scheme of taking the bottom 32 bits of each operand
1396   and doing a 32-bit UifU.  Basically since UifU is fast and
1397   chopping lanes off vector values is slow.
1398
1399   Finally:
1400
1401   unary32F0x4(x)     ==> SetV128lo32(
1402                             x#,
1403                             PCast32(V128to32(x#))
1404                          )
1405
1406   Where:
1407
1408   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1409   PCast32x4(v#) = CmpNEZ32x4(v#)
1410*/
1411
1412static
1413IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1414{
1415   IRAtom* at;
1416   tl_assert(isShadowAtom(mce, vatomX));
1417   tl_assert(isShadowAtom(mce, vatomY));
1418   at = mkUifUV128(mce, vatomX, vatomY);
1419   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1420   return at;
1421}
1422
1423static
1424IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1425{
1426   IRAtom* at;
1427   tl_assert(isShadowAtom(mce, vatomX));
1428   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1429   return at;
1430}
1431
1432static
1433IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1434{
1435   IRAtom* at;
1436   tl_assert(isShadowAtom(mce, vatomX));
1437   tl_assert(isShadowAtom(mce, vatomY));
1438   at = mkUifUV128(mce, vatomX, vatomY);
1439   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1440   at = mkPCastTo(mce, Ity_I32, at);
1441   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1442   return at;
1443}
1444
1445static
1446IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1447{
1448   IRAtom* at;
1449   tl_assert(isShadowAtom(mce, vatomX));
1450   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1451   at = mkPCastTo(mce, Ity_I32, at);
1452   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1453   return at;
1454}
1455
1456/* --- ... and ... 64Fx2 versions of the same ... --- */
1457
1458static
1459IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1460{
1461   IRAtom* at;
1462   tl_assert(isShadowAtom(mce, vatomX));
1463   tl_assert(isShadowAtom(mce, vatomY));
1464   at = mkUifUV128(mce, vatomX, vatomY);
1465   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1466   return at;
1467}
1468
1469static
1470IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1471{
1472   IRAtom* at;
1473   tl_assert(isShadowAtom(mce, vatomX));
1474   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1475   return at;
1476}
1477
1478static
1479IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1480{
1481   IRAtom* at;
1482   tl_assert(isShadowAtom(mce, vatomX));
1483   tl_assert(isShadowAtom(mce, vatomY));
1484   at = mkUifUV128(mce, vatomX, vatomY);
1485   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1486   at = mkPCastTo(mce, Ity_I64, at);
1487   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1488   return at;
1489}
1490
1491static
1492IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1493{
1494   IRAtom* at;
1495   tl_assert(isShadowAtom(mce, vatomX));
1496   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1497   at = mkPCastTo(mce, Ity_I64, at);
1498   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1499   return at;
1500}
1501
1502/* --- --- Vector saturated narrowing --- --- */
1503
1504/* This is quite subtle.  What to do is simple:
1505
1506   Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1507
1508      the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1509
1510   Why this is right is not so simple.  Consider a lane in the args,
1511   vatom1 or 2, doesn't matter.
1512
1513   After the PCast, that lane is all 0s (defined) or all
1514   1s(undefined).
1515
1516   Both signed and unsigned saturating narrowing of all 0s produces
1517   all 0s, which is what we want.
1518
1519   The all-1s case is more complex.  Unsigned narrowing interprets an
1520   all-1s input as the largest unsigned integer, and so produces all
1521   1s as a result since that is the largest unsigned value at the
1522   smaller width.
1523
1524   Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1525   to -1, so we still wind up with all 1s at the smaller width.
1526
1527   So: In short, pessimise the args, then apply the original narrowing
1528   op.
1529*/
1530static
1531IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1532                          IRAtom* vatom1, IRAtom* vatom2)
1533{
1534   IRAtom *at1, *at2, *at3;
1535   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1536   switch (narrow_op) {
1537      case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
1538      case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
1539      case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
1540      default: VG_(tool_panic)("vectorNarrowV128");
1541   }
1542   tl_assert(isShadowAtom(mce,vatom1));
1543   tl_assert(isShadowAtom(mce,vatom2));
1544   at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1545   at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1546   at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1547   return at3;
1548}
1549
1550
1551/* --- --- Vector integer arithmetic --- --- */
1552
1553/* Simple ... UifU the args and per-lane pessimise the results. */
1554static
1555IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1556{
1557   IRAtom* at;
1558   at = mkUifUV128(mce, vatom1, vatom2);
1559   at = mkPCast8x16(mce, at);
1560   return at;
1561}
1562
1563static
1564IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1565{
1566   IRAtom* at;
1567   at = mkUifUV128(mce, vatom1, vatom2);
1568   at = mkPCast16x8(mce, at);
1569   return at;
1570}
1571
1572static
1573IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1574{
1575   IRAtom* at;
1576   at = mkUifUV128(mce, vatom1, vatom2);
1577   at = mkPCast32x4(mce, at);
1578   return at;
1579}
1580
1581static
1582IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1583{
1584   IRAtom* at;
1585   at = mkUifUV128(mce, vatom1, vatom2);
1586   at = mkPCast64x2(mce, at);
1587   return at;
1588}
1589
1590
1591/*------------------------------------------------------------*/
1592/*--- Generate shadow values from all kinds of IRExprs.    ---*/
1593/*------------------------------------------------------------*/
1594
1595static
1596IRAtom* expr2vbits_Binop ( MCEnv* mce,
1597                           IROp op,
1598                           IRAtom* atom1, IRAtom* atom2 )
1599{
1600   IRType  and_or_ty;
1601   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1602   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1603   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1604
1605   IRAtom* vatom1 = expr2vbits( mce, atom1 );
1606   IRAtom* vatom2 = expr2vbits( mce, atom2 );
1607
1608   tl_assert(isOriginalAtom(mce,atom1));
1609   tl_assert(isOriginalAtom(mce,atom2));
1610   tl_assert(isShadowAtom(mce,vatom1));
1611   tl_assert(isShadowAtom(mce,vatom2));
1612   tl_assert(sameKindedAtoms(atom1,vatom1));
1613   tl_assert(sameKindedAtoms(atom2,vatom2));
1614   switch (op) {
1615
1616      /* V128-bit SIMD (SSE2-esque) */
1617
1618      case Iop_ShrN16x8:
1619      case Iop_ShrN32x4:
1620      case Iop_ShrN64x2:
1621      case Iop_SarN16x8:
1622      case Iop_SarN32x4:
1623      case Iop_ShlN16x8:
1624      case Iop_ShlN32x4:
1625      case Iop_ShlN64x2:
1626         /* Same scheme as with all other shifts. */
1627         complainIfUndefined(mce, atom2);
1628         return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1629
1630      case Iop_QSub8Ux16:
1631      case Iop_QSub8Sx16:
1632      case Iop_Sub8x16:
1633      case Iop_Min8Ux16:
1634      case Iop_Max8Ux16:
1635      case Iop_CmpGT8Sx16:
1636      case Iop_CmpEQ8x16:
1637      case Iop_Avg8Ux16:
1638      case Iop_QAdd8Ux16:
1639      case Iop_QAdd8Sx16:
1640      case Iop_Add8x16:
1641         return binary8Ix16(mce, vatom1, vatom2);
1642
1643      case Iop_QSub16Ux8:
1644      case Iop_QSub16Sx8:
1645      case Iop_Sub16x8:
1646      case Iop_Mul16x8:
1647      case Iop_MulHi16Sx8:
1648      case Iop_MulHi16Ux8:
1649      case Iop_Min16Sx8:
1650      case Iop_Max16Sx8:
1651      case Iop_CmpGT16Sx8:
1652      case Iop_CmpEQ16x8:
1653      case Iop_Avg16Ux8:
1654      case Iop_QAdd16Ux8:
1655      case Iop_QAdd16Sx8:
1656      case Iop_Add16x8:
1657         return binary16Ix8(mce, vatom1, vatom2);
1658
1659      case Iop_Sub32x4:
1660      case Iop_QSub32Sx4:
1661      case Iop_QSub32Ux4:
1662      case Iop_CmpGT32Sx4:
1663      case Iop_CmpEQ32x4:
1664      case Iop_Add32x4:
1665      case Iop_QAdd32Ux4:
1666      case Iop_QAdd32Sx4:
1667         return binary32Ix4(mce, vatom1, vatom2);
1668
1669      case Iop_Sub64x2:
1670      case Iop_QSub64Ux2:
1671      case Iop_QSub64Sx2:
1672      case Iop_Add64x2:
1673      case Iop_QAdd64Ux2:
1674      case Iop_QAdd64Sx2:
1675         return binary64Ix2(mce, vatom1, vatom2);
1676
1677      case Iop_QNarrowBin32Sto16Sx8:
1678      case Iop_QNarrowBin16Sto8Sx16:
1679      case Iop_QNarrowBin16Sto8Ux16:
1680         return vectorNarrowV128(mce, op, vatom1, vatom2);
1681
1682      case Iop_Sub64Fx2:
1683      case Iop_Mul64Fx2:
1684      case Iop_Min64Fx2:
1685      case Iop_Max64Fx2:
1686      case Iop_Div64Fx2:
1687      case Iop_CmpLT64Fx2:
1688      case Iop_CmpLE64Fx2:
1689      case Iop_CmpEQ64Fx2:
1690      case Iop_Add64Fx2:
1691         return binary64Fx2(mce, vatom1, vatom2);
1692
1693      case Iop_Sub64F0x2:
1694      case Iop_Mul64F0x2:
1695      case Iop_Min64F0x2:
1696      case Iop_Max64F0x2:
1697      case Iop_Div64F0x2:
1698      case Iop_CmpLT64F0x2:
1699      case Iop_CmpLE64F0x2:
1700      case Iop_CmpEQ64F0x2:
1701      case Iop_Add64F0x2:
1702         return binary64F0x2(mce, vatom1, vatom2);
1703
1704      /* V128-bit SIMD (SSE1-esque) */
1705
1706      case Iop_Sub32Fx4:
1707      case Iop_Mul32Fx4:
1708      case Iop_Min32Fx4:
1709      case Iop_Max32Fx4:
1710      case Iop_Div32Fx4:
1711      case Iop_CmpLT32Fx4:
1712      case Iop_CmpLE32Fx4:
1713      case Iop_CmpEQ32Fx4:
1714      case Iop_Add32Fx4:
1715         return binary32Fx4(mce, vatom1, vatom2);
1716
1717      case Iop_Sub32F0x4:
1718      case Iop_Mul32F0x4:
1719      case Iop_Min32F0x4:
1720      case Iop_Max32F0x4:
1721      case Iop_Div32F0x4:
1722      case Iop_CmpLT32F0x4:
1723      case Iop_CmpLE32F0x4:
1724      case Iop_CmpEQ32F0x4:
1725      case Iop_Add32F0x4:
1726         return binary32F0x4(mce, vatom1, vatom2);
1727
1728      /* V128-bit data-steering */
1729      case Iop_SetV128lo32:
1730      case Iop_SetV128lo64:
1731      case Iop_64HLtoV128:
1732      case Iop_InterleaveLO64x2:
1733      case Iop_InterleaveLO32x4:
1734      case Iop_InterleaveLO16x8:
1735      case Iop_InterleaveLO8x16:
1736      case Iop_InterleaveHI64x2:
1737      case Iop_InterleaveHI32x4:
1738      case Iop_InterleaveHI16x8:
1739      case Iop_InterleaveHI8x16:
1740         return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1741
1742      /* Scalar floating point */
1743
1744         //      case Iop_RoundF64:
1745      case Iop_F64toI64S:
1746      case Iop_I64StoF64:
1747         /* First arg is I32 (rounding mode), second is F64 or I64
1748            (data). */
1749         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1750
1751      case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1752         /* Takes two F64 args. */
1753      case Iop_F64toI32S:
1754      case Iop_F64toF32:
1755         /* First arg is I32 (rounding mode), second is F64 (data). */
1756         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1757
1758      case Iop_F64toI16S:
1759         /* First arg is I32 (rounding mode), second is F64 (data). */
1760         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1761
1762      case Iop_ScaleF64:
1763      case Iop_Yl2xF64:
1764      case Iop_Yl2xp1F64:
1765      case Iop_PRemF64:
1766      case Iop_AtanF64:
1767      case Iop_AddF64:
1768      case Iop_DivF64:
1769      case Iop_SubF64:
1770      case Iop_MulF64:
1771         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1772
1773      case Iop_CmpF64:
1774         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1775
1776      /* non-FP after here */
1777
1778      case Iop_DivModU64to32:
1779      case Iop_DivModS64to32:
1780         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1781
1782      case Iop_16HLto32:
1783         return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1784      case Iop_32HLto64:
1785         return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1786
1787      case Iop_MullS32:
1788      case Iop_MullU32: {
1789         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1790         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1791         return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1792      }
1793
1794      case Iop_MullS16:
1795      case Iop_MullU16: {
1796         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1797         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1798         return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1799      }
1800
1801      case Iop_MullS8:
1802      case Iop_MullU8: {
1803         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1804         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1805         return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1806      }
1807
1808      case Iop_Add32:
1809#        if 0
1810         return expensiveAdd32(mce, vatom1,vatom2, atom1,atom2);
1811#        endif
1812      case Iop_Sub32:
1813      case Iop_Mul32:
1814         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1815
1816      case Iop_Mul16:
1817      case Iop_Add16:
1818      case Iop_Sub16:
1819         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1820
1821      case Iop_Sub8:
1822      case Iop_Add8:
1823         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1824
1825      case Iop_CmpLE32S: case Iop_CmpLE32U:
1826      case Iop_CmpLT32U: case Iop_CmpLT32S:
1827      case Iop_CmpEQ32: case Iop_CmpNE32:
1828         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1829
1830      case Iop_CmpEQ16: case Iop_CmpNE16:
1831         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1832
1833      case Iop_CmpEQ8: case Iop_CmpNE8:
1834         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1835
1836      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1837         /* Complain if the shift amount is undefined.  Then simply
1838            shift the first arg's V bits by the real shift amount. */
1839         complainIfUndefined(mce, atom2);
1840         return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1841
1842      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1843         /* Same scheme as with 32-bit shifts. */
1844         complainIfUndefined(mce, atom2);
1845         return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1846
1847      case Iop_Shl8: case Iop_Shr8:
1848         /* Same scheme as with 32-bit shifts. */
1849         complainIfUndefined(mce, atom2);
1850         return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1851
1852      case Iop_Shl64: case Iop_Shr64:
1853         /* Same scheme as with 32-bit shifts. */
1854         complainIfUndefined(mce, atom2);
1855         return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1856
1857      case Iop_AndV128:
1858         uifu = mkUifUV128; difd = mkDifDV128;
1859         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1860      case Iop_And64:
1861         uifu = mkUifU64; difd = mkDifD64;
1862         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1863      case Iop_And32:
1864         uifu = mkUifU32; difd = mkDifD32;
1865         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1866      case Iop_And16:
1867         uifu = mkUifU16; difd = mkDifD16;
1868         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1869      case Iop_And8:
1870         uifu = mkUifU8; difd = mkDifD8;
1871         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1872
1873      case Iop_OrV128:
1874         uifu = mkUifUV128; difd = mkDifDV128;
1875         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1876      case Iop_Or64:
1877         uifu = mkUifU64; difd = mkDifD64;
1878         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1879      case Iop_Or32:
1880         uifu = mkUifU32; difd = mkDifD32;
1881         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1882      case Iop_Or16:
1883         uifu = mkUifU16; difd = mkDifD16;
1884         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1885      case Iop_Or8:
1886         uifu = mkUifU8; difd = mkDifD8;
1887         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1888
1889      do_And_Or:
1890         return
1891         assignNew(
1892            mce,
1893            and_or_ty,
1894            difd(mce, uifu(mce, vatom1, vatom2),
1895                      difd(mce, improve(mce, atom1, vatom1),
1896                                improve(mce, atom2, vatom2) ) ) );
1897
1898      case Iop_Xor8:
1899         return mkUifU8(mce, vatom1, vatom2);
1900      case Iop_Xor16:
1901         return mkUifU16(mce, vatom1, vatom2);
1902      case Iop_Xor32:
1903         return mkUifU32(mce, vatom1, vatom2);
1904      case Iop_Xor64:
1905         return mkUifU64(mce, vatom1, vatom2);
1906      case Iop_XorV128:
1907         return mkUifUV128(mce, vatom1, vatom2);
1908
1909      default:
1910         ppIROp(op);
1911         VG_(tool_panic)("memcheck:expr2vbits_Binop");
1912   }
1913}
1914
1915
1916static
1917IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1918{
1919   IRAtom* vatom = expr2vbits( mce, atom );
1920   tl_assert(isOriginalAtom(mce,atom));
1921   switch (op) {
1922
1923      case Iop_Sqrt64Fx2:
1924         return unary64Fx2(mce, vatom);
1925
1926      case Iop_Sqrt64F0x2:
1927         return unary64F0x2(mce, vatom);
1928
1929      case Iop_Sqrt32Fx4:
1930      case Iop_RSqrt32Fx4:
1931      case Iop_Recip32Fx4:
1932         return unary32Fx4(mce, vatom);
1933
1934      case Iop_Sqrt32F0x4:
1935      case Iop_RSqrt32F0x4:
1936      case Iop_Recip32F0x4:
1937         return unary32F0x4(mce, vatom);
1938
1939      case Iop_32UtoV128:
1940      case Iop_64UtoV128:
1941         return assignNew(mce, Ity_V128, unop(op, vatom));
1942
1943      case Iop_F32toF64:
1944      case Iop_I32StoF64:
1945      case Iop_NegF64:
1946      case Iop_SinF64:
1947      case Iop_CosF64:
1948      case Iop_TanF64:
1949      case Iop_SqrtF64:
1950      case Iop_AbsF64:
1951      case Iop_2xm1F64:
1952         return mkPCastTo(mce, Ity_I64, vatom);
1953
1954      case Iop_Clz32:
1955      case Iop_Ctz32:
1956         return mkPCastTo(mce, Ity_I32, vatom);
1957
1958      case Iop_32Sto64:
1959      case Iop_32Uto64:
1960      case Iop_V128to64:
1961      case Iop_V128HIto64:
1962         return assignNew(mce, Ity_I64, unop(op, vatom));
1963
1964      case Iop_64to32:
1965      case Iop_64HIto32:
1966      case Iop_1Uto32:
1967      case Iop_8Uto32:
1968      case Iop_16Uto32:
1969      case Iop_16Sto32:
1970      case Iop_8Sto32:
1971         return assignNew(mce, Ity_I32, unop(op, vatom));
1972
1973      case Iop_8Sto16:
1974      case Iop_8Uto16:
1975      case Iop_32to16:
1976      case Iop_32HIto16:
1977         return assignNew(mce, Ity_I16, unop(op, vatom));
1978
1979      case Iop_1Uto8:
1980      case Iop_16to8:
1981      case Iop_32to8:
1982         return assignNew(mce, Ity_I8, unop(op, vatom));
1983
1984      case Iop_32to1:
1985         return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1986
1987      case Iop_ReinterpF64asI64:
1988      case Iop_ReinterpI64asF64:
1989      case Iop_ReinterpI32asF32:
1990      case Iop_NotV128:
1991      case Iop_Not64:
1992      case Iop_Not32:
1993      case Iop_Not16:
1994      case Iop_Not8:
1995      case Iop_Not1:
1996         return vatom;
1997
1998      default:
1999         ppIROp(op);
2000         VG_(tool_panic)("memcheck:expr2vbits_Unop");
2001   }
2002}
2003
2004
2005/* Worker function; do not call directly. */
2006static
2007IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2008{
2009   void*    helper;
2010   HChar*   hname;
2011   IRDirty* di;
2012   IRTemp   datavbits;
2013   IRAtom*  addrAct;
2014
2015   tl_assert(isOriginalAtom(mce,addr));
2016
2017   /* First, emit a definedness test for the address.  This also sets
2018      the address (shadow) to 'defined' following the test. */
2019   complainIfUndefined( mce, addr );
2020
2021   /* Now cook up a call to the relevant helper function, to read the
2022      data V bits from shadow memory. */
2023   ty = shadowType(ty);
2024   switch (ty) {
2025      case Ity_I64: helper = &MC_(helperc_LOADV8);
2026                    hname = "MC_(helperc_LOADV8)";
2027                    break;
2028      case Ity_I32: helper = &MC_(helperc_LOADV4);
2029                    hname = "MC_(helperc_LOADV4)";
2030                    break;
2031      case Ity_I16: helper = &MC_(helperc_LOADV2);
2032                    hname = "MC_(helperc_LOADV2)";
2033                    break;
2034      case Ity_I8:  helper = &MC_(helperc_LOADV1);
2035                    hname = "MC_(helperc_LOADV1)";
2036                    break;
2037      default:      ppIRType(ty);
2038                    VG_(tool_panic)("memcheck:do_shadow_LDle");
2039   }
2040
2041   /* Generate the actual address into addrAct. */
2042   if (bias == 0) {
2043      addrAct = addr;
2044   } else {
2045      IROp    mkAdd;
2046      IRAtom* eBias;
2047      IRType  tyAddr  = mce->hWordTy;
2048      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2049      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2050      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2051      addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2052   }
2053
2054   /* We need to have a place to park the V bits we're just about to
2055      read. */
2056   datavbits = newIRTemp(mce->bb->tyenv, ty);
2057   di = unsafeIRDirty_1_N( datavbits,
2058                           1/*regparms*/, hname, helper,
2059                           mkIRExprVec_1( addrAct ));
2060   setHelperAnns( mce, di );
2061   stmt( mce->bb, IRStmt_Dirty(di) );
2062
2063   return mkexpr(datavbits);
2064}
2065
2066
2067static
2068IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
2069{
2070   IRAtom *v64hi, *v64lo;
2071   switch (shadowType(ty)) {
2072      case Ity_I8:
2073      case Ity_I16:
2074      case Ity_I32:
2075      case Ity_I64:
2076         return expr2vbits_LDle_WRK(mce, ty, addr, bias);
2077      case Ity_V128:
2078         v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
2079         v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
2080         return assignNew( mce,
2081                           Ity_V128,
2082                           binop(Iop_64HLtoV128, v64hi, v64lo));
2083      default:
2084         VG_(tool_panic)("expr2vbits_LDle");
2085   }
2086}
2087
2088
2089static
2090IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
2091                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
2092{
2093   IRAtom *vbitsC, *vbits0, *vbitsX;
2094   IRType ty;
2095   /* Given Mux0X(cond,expr0,exprX), generate
2096         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
2097      That is, steer the V bits like the originals, but trash the
2098      result if the steering value is undefined.  This gives
2099      lazy propagation. */
2100   tl_assert(isOriginalAtom(mce, cond));
2101   tl_assert(isOriginalAtom(mce, expr0));
2102   tl_assert(isOriginalAtom(mce, exprX));
2103
2104   vbitsC = expr2vbits(mce, cond);
2105   vbits0 = expr2vbits(mce, expr0);
2106   vbitsX = expr2vbits(mce, exprX);
2107   ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2108
2109   return
2110      mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2111                      mkPCastTo(mce, ty, vbitsC) );
2112}
2113
2114/* --------- This is the main expression-handling function. --------- */
2115
2116static
2117IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2118{
2119   switch (e->tag) {
2120
2121      case Iex_Get:
2122         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2123
2124      case Iex_GetI:
2125         return shadow_GETI( mce, e->Iex.GetI.descr,
2126                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
2127
2128      case Iex_RdTmp:
2129         return IRExpr_RdTmp( findShadowTmp(mce, e->Iex.RdTmp.tmp) );
2130
2131      case Iex_Const:
2132         return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2133
2134      case Iex_Binop:
2135         return expr2vbits_Binop(
2136                   mce,
2137                   e->Iex.Binop.op,
2138                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
2139                );
2140
2141      case Iex_Unop:
2142         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2143
2144      case Iex_Load:
2145         return expr2vbits_LDle( mce, e->Iex.Load.ty,
2146                                      e->Iex.Load.addr, 0/*addr bias*/ );
2147
2148      case Iex_CCall:
2149         return mkLazyN( mce, e->Iex.CCall.args,
2150                              e->Iex.CCall.retty,
2151                              e->Iex.CCall.cee );
2152
2153      case Iex_Mux0X:
2154         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2155                                       e->Iex.Mux0X.exprX);
2156
2157      default:
2158         VG_(printf)("\n");
2159         ppIRExpr(e);
2160         VG_(printf)("\n");
2161         VG_(tool_panic)("memcheck: expr2vbits");
2162   }
2163}
2164
2165/*------------------------------------------------------------*/
2166/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2167/*------------------------------------------------------------*/
2168
2169/* Widen a value to the host word size. */
2170
2171static
2172IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2173{
2174   IRType ty, tyH;
2175
2176   /* vatom is vbits-value and as such can only have a shadow type. */
2177   tl_assert(isShadowAtom(mce,vatom));
2178
2179   ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2180   tyH = mce->hWordTy;
2181
2182   if (tyH == Ity_I32) {
2183      switch (ty) {
2184         case Ity_I32: return vatom;
2185         case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2186         case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2187         default:      goto unhandled;
2188      }
2189   } else {
2190      goto unhandled;
2191   }
2192  unhandled:
2193   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2194   VG_(tool_panic)("zwidenToHostWord");
2195}
2196
2197
2198/* Generate a shadow store.  addr is always the original address atom.
2199   You can pass in either originals or V-bits for the data atom, but
2200   obviously not both.  */
2201
2202static
2203void do_shadow_STle ( MCEnv* mce,
2204                      IRAtom* addr, UInt bias,
2205                      IRAtom* data, IRAtom* vdata )
2206{
2207   IROp     mkAdd;
2208   IRType   ty, tyAddr;
2209   IRDirty  *di, *diLo64, *diHi64;
2210   IRAtom   *addrAct, *addrLo64, *addrHi64;
2211   IRAtom   *vdataLo64, *vdataHi64;
2212   IRAtom   *eBias, *eBias0, *eBias8;
2213   void*    helper = NULL;
2214   HChar*   hname = NULL;
2215
2216   tyAddr = mce->hWordTy;
2217   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2218   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2219
2220   di = diLo64 = diHi64 = NULL;
2221   eBias = eBias0 = eBias8 = NULL;
2222   addrAct = addrLo64 = addrHi64 = NULL;
2223   vdataLo64 = vdataHi64 = NULL;
2224
2225   if (data) {
2226      tl_assert(!vdata);
2227      tl_assert(isOriginalAtom(mce, data));
2228      tl_assert(bias == 0);
2229      vdata = expr2vbits( mce, data );
2230   } else {
2231      tl_assert(vdata);
2232   }
2233
2234   tl_assert(isOriginalAtom(mce,addr));
2235   tl_assert(isShadowAtom(mce,vdata));
2236
2237   ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2238
2239   /* First, emit a definedness test for the address.  This also sets
2240      the address (shadow) to 'defined' following the test. */
2241   complainIfUndefined( mce, addr );
2242
2243   /* Now decide which helper function to call to write the data V
2244      bits into shadow memory. */
2245   switch (ty) {
2246      case Ity_V128: /* we'll use the helper twice */
2247      case Ity_I64: helper = &MC_(helperc_STOREV8);
2248                    hname = "MC_(helperc_STOREV8)";
2249                    break;
2250      case Ity_I32: helper = &MC_(helperc_STOREV4);
2251                    hname = "MC_(helperc_STOREV4)";
2252                    break;
2253      case Ity_I16: helper = &MC_(helperc_STOREV2);
2254                    hname = "MC_(helperc_STOREV2)";
2255                    break;
2256      case Ity_I8:  helper = &MC_(helperc_STOREV1);
2257                    hname = "MC_(helperc_STOREV1)";
2258                    break;
2259      default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2260   }
2261
2262   if (ty == Ity_V128) {
2263
2264      /* V128-bit case */
2265      /* See comment in next clause re 64-bit regparms */
2266      eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2267      addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2268      vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2269      diLo64    = unsafeIRDirty_0_N(
2270                     1/*regparms*/, hname, helper,
2271                     mkIRExprVec_2( addrLo64, vdataLo64 ));
2272
2273      eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2274      addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2275      vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2276      diHi64    = unsafeIRDirty_0_N(
2277                     1/*regparms*/, hname, helper,
2278                     mkIRExprVec_2( addrHi64, vdataHi64 ));
2279
2280      setHelperAnns( mce, diLo64 );
2281      setHelperAnns( mce, diHi64 );
2282      stmt( mce->bb, IRStmt_Dirty(diLo64) );
2283      stmt( mce->bb, IRStmt_Dirty(diHi64) );
2284
2285   } else {
2286
2287      /* 8/16/32/64-bit cases */
2288      /* Generate the actual address into addrAct. */
2289      if (bias == 0) {
2290         addrAct = addr;
2291      } else {
2292         eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2293         addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2294      }
2295
2296      if (ty == Ity_I64) {
2297         /* We can't do this with regparm 2 on 32-bit platforms, since
2298            the back ends aren't clever enough to handle 64-bit
2299            regparm args.  Therefore be different. */
2300         di = unsafeIRDirty_0_N(
2301                 1/*regparms*/, hname, helper,
2302                 mkIRExprVec_2( addrAct, vdata ));
2303      } else {
2304         di = unsafeIRDirty_0_N(
2305                 2/*regparms*/, hname, helper,
2306                 mkIRExprVec_2( addrAct,
2307                                zwidenToHostWord( mce, vdata )));
2308      }
2309      setHelperAnns( mce, di );
2310      stmt( mce->bb, IRStmt_Dirty(di) );
2311   }
2312
2313}
2314
2315
2316/* Do lazy pessimistic propagation through a dirty helper call, by
2317   looking at the annotations on it.  This is the most complex part of
2318   Memcheck. */
2319
2320static IRType szToITy ( Int n )
2321{
2322   switch (n) {
2323      case 1: return Ity_I8;
2324      case 2: return Ity_I16;
2325      case 4: return Ity_I32;
2326      case 8: return Ity_I64;
2327      default: VG_(tool_panic)("szToITy(memcheck)");
2328   }
2329}
2330
2331static
2332void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2333{
2334   Int     i, n, offset, toDo, gSz, gOff;
2335   IRAtom  *src, *here, *curr;
2336   IRType  tyAddr, tySrc, tyDst;
2337   IRTemp  dst;
2338
2339   /* First check the guard. */
2340   complainIfUndefined(mce, d->guard);
2341
2342   /* Now round up all inputs and PCast over them. */
2343   curr = definedOfType(Ity_I32);
2344
2345   /* Inputs: unmasked args */
2346   for (i = 0; d->args[i]; i++) {
2347      if (d->cee->mcx_mask & (1<<i)) {
2348         /* ignore this arg */
2349      } else {
2350         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2351         curr = mkUifU32(mce, here, curr);
2352      }
2353   }
2354
2355   /* Inputs: guest state that we read. */
2356   for (i = 0; i < d->nFxState; i++) {
2357      tl_assert(d->fxState[i].fx != Ifx_None);
2358      if (d->fxState[i].fx == Ifx_Write)
2359         continue;
2360
2361      /* Ignore any sections marked as 'always defined'. */
2362      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2363         if (0)
2364         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2365                     d->fxState[i].offset, d->fxState[i].size );
2366         continue;
2367      }
2368
2369      /* This state element is read or modified.  So we need to
2370         consider it.  If larger than 8 bytes, deal with it in 8-byte
2371         chunks. */
2372      gSz  = d->fxState[i].size;
2373      gOff = d->fxState[i].offset;
2374      tl_assert(gSz > 0);
2375      while (True) {
2376         if (gSz == 0) break;
2377         n = gSz <= 8 ? gSz : 8;
2378         /* update 'curr' with UifU of the state slice
2379            gOff .. gOff+n-1 */
2380         tySrc = szToITy( n );
2381         src   = assignNew( mce, tySrc,
2382                            shadow_GET(mce, gOff, tySrc ) );
2383         here = mkPCastTo( mce, Ity_I32, src );
2384         curr = mkUifU32(mce, here, curr);
2385         gSz -= n;
2386         gOff += n;
2387      }
2388
2389   }
2390
2391   /* Inputs: memory.  First set up some info needed regardless of
2392      whether we're doing reads or writes. */
2393   tyAddr = Ity_INVALID;
2394
2395   if (d->mFx != Ifx_None) {
2396      /* Because we may do multiple shadow loads/stores from the same
2397         base address, it's best to do a single test of its
2398         definedness right now.  Post-instrumentation optimisation
2399         should remove all but this test. */
2400      tl_assert(d->mAddr);
2401      complainIfUndefined(mce, d->mAddr);
2402
2403      tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2404      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2405      tl_assert(tyAddr == mce->hWordTy); /* not really right */
2406   }
2407
2408   /* Deal with memory inputs (reads or modifies) */
2409   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2410      offset = 0;
2411      toDo   = d->mSize;
2412      /* chew off 32-bit chunks */
2413      while (toDo >= 4) {
2414         here = mkPCastTo(
2415                   mce, Ity_I32,
2416                   expr2vbits_LDle ( mce, Ity_I32,
2417                                     d->mAddr, d->mSize - toDo )
2418                );
2419         curr = mkUifU32(mce, here, curr);
2420         toDo -= 4;
2421      }
2422      /* chew off 16-bit chunks */
2423      while (toDo >= 2) {
2424         here = mkPCastTo(
2425                   mce, Ity_I32,
2426                   expr2vbits_LDle ( mce, Ity_I16,
2427                                     d->mAddr, d->mSize - toDo )
2428                );
2429         curr = mkUifU32(mce, here, curr);
2430         toDo -= 2;
2431      }
2432      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2433   }
2434
2435   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2436      all the inputs to the helper.  Now we need to re-distribute the
2437      results to all destinations. */
2438
2439   /* Outputs: the destination temporary, if there is one. */
2440   if (d->tmp != IRTemp_INVALID) {
2441      dst   = findShadowTmp(mce, d->tmp);
2442      tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2443      assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2444   }
2445
2446   /* Outputs: guest state that we write or modify. */
2447   for (i = 0; i < d->nFxState; i++) {
2448      tl_assert(d->fxState[i].fx != Ifx_None);
2449      if (d->fxState[i].fx == Ifx_Read)
2450         continue;
2451      /* Ignore any sections marked as 'always defined'. */
2452      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2453         continue;
2454      /* This state element is written or modified.  So we need to
2455         consider it.  If larger than 8 bytes, deal with it in 8-byte
2456         chunks. */
2457      gSz  = d->fxState[i].size;
2458      gOff = d->fxState[i].offset;
2459      tl_assert(gSz > 0);
2460      while (True) {
2461         if (gSz == 0) break;
2462         n = gSz <= 8 ? gSz : 8;
2463         /* Write suitably-casted 'curr' to the state slice
2464            gOff .. gOff+n-1 */
2465         tyDst = szToITy( n );
2466         do_shadow_PUT( mce, gOff,
2467                             NULL, /* original atom */
2468                             mkPCastTo( mce, tyDst, curr ) );
2469         gSz -= n;
2470         gOff += n;
2471      }
2472   }
2473
2474   /* Outputs: memory that we write or modify. */
2475   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2476      offset = 0;
2477      toDo   = d->mSize;
2478      /* chew off 32-bit chunks */
2479      while (toDo >= 4) {
2480         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2481                         NULL, /* original data */
2482                         mkPCastTo( mce, Ity_I32, curr ) );
2483         toDo -= 4;
2484      }
2485      /* chew off 16-bit chunks */
2486      while (toDo >= 2) {
2487         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2488                         NULL, /* original data */
2489                         mkPCastTo( mce, Ity_I16, curr ) );
2490         toDo -= 2;
2491      }
2492      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2493   }
2494
2495}
2496
2497
2498/*------------------------------------------------------------*/
2499/*--- Memcheck main                                        ---*/
2500/*------------------------------------------------------------*/
2501
2502static Bool isBogusAtom ( IRAtom* at )
2503{
2504   ULong n = 0;
2505   IRConst* con;
2506   tl_assert(isIRAtom(at));
2507   if (at->tag == Iex_RdTmp)
2508      return False;
2509   tl_assert(at->tag == Iex_Const);
2510   con = at->Iex.Const.con;
2511   switch (con->tag) {
2512      case Ico_U8:  n = (ULong)con->Ico.U8; break;
2513      case Ico_U16: n = (ULong)con->Ico.U16; break;
2514      case Ico_U32: n = (ULong)con->Ico.U32; break;
2515      case Ico_U64: n = (ULong)con->Ico.U64; break;
2516      default: ppIRExpr(at); tl_assert(0);
2517   }
2518   /* VG_(printf)("%llx\n", n); */
2519   return (n == 0xFEFEFEFF
2520           || n == 0x80808080
2521           || n == 0x1010101
2522           || n == 1010100);
2523}
2524
2525__attribute__((unused))
2526static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2527{
2528   Int     i;
2529   IRExpr* e;
2530   switch (st->tag) {
2531      case Ist_WrTmp:
2532         e = st->Ist.WrTmp.data;
2533         switch (e->tag) {
2534            case Iex_Get:
2535            case Iex_RdTmp:
2536               return False;
2537            case Iex_Unop:
2538               return isBogusAtom(e->Iex.Unop.arg);
2539            case Iex_Binop:
2540               return isBogusAtom(e->Iex.Binop.arg1)
2541                      || isBogusAtom(e->Iex.Binop.arg2);
2542            case Iex_Mux0X:
2543               return isBogusAtom(e->Iex.Mux0X.cond)
2544                      || isBogusAtom(e->Iex.Mux0X.expr0)
2545                      || isBogusAtom(e->Iex.Mux0X.exprX);
2546            case Iex_Load:
2547               return isBogusAtom(e->Iex.Load.addr);
2548            case Iex_CCall:
2549               for (i = 0; e->Iex.CCall.args[i]; i++)
2550                  if (isBogusAtom(e->Iex.CCall.args[i]))
2551                     return True;
2552               return False;
2553            default:
2554               goto unhandled;
2555         }
2556      case Ist_Put:
2557         return isBogusAtom(st->Ist.Put.data);
2558      case Ist_Store:
2559         return isBogusAtom(st->Ist.Store.addr)
2560                || isBogusAtom(st->Ist.Store.data);
2561      case Ist_Exit:
2562         return isBogusAtom(st->Ist.Exit.guard);
2563      default:
2564      unhandled:
2565         ppIRStmt(st);
2566         VG_(tool_panic)("hasBogusLiterals");
2567   }
2568}
2569
2570IRSB* mc_instrument ( void* closureV,
2571                      IRSB* bb_in, VexGuestLayout* layout,
2572                      VexGuestExtents* vge,
2573                      IRType gWordTy, IRType hWordTy )
2574{
2575   Bool verboze = False; //True;
2576
2577   /* Bool hasBogusLiterals = False; */
2578
2579   Int i, j, first_stmt;
2580   IRStmt* st;
2581   MCEnv mce;
2582
2583   /* Set up BB */
2584   IRSB* bb     = emptyIRSB();
2585   bb->tyenv    = deepCopyIRTypeEnv(bb_in->tyenv);
2586   bb->next     = deepCopyIRExpr(bb_in->next);
2587   bb->jumpkind = bb_in->jumpkind;
2588
2589   /* Set up the running environment.  Only .bb is modified as we go
2590      along. */
2591   mce.bb             = bb;
2592   mce.layout         = layout;
2593   mce.n_originalTmps = bb->tyenv->types_used;
2594   mce.hWordTy        = hWordTy;
2595   mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2596   for (i = 0; i < mce.n_originalTmps; i++)
2597      mce.tmpMap[i] = IRTemp_INVALID;
2598
2599   /* Iterate over the stmts. */
2600
2601   for (i = 0; i <  bb_in->stmts_used; i++) {
2602      st = bb_in->stmts[i];
2603      if (!st) continue;
2604
2605      tl_assert(isFlatIRStmt(st));
2606
2607      /*
2608      if (!hasBogusLiterals) {
2609         hasBogusLiterals = checkForBogusLiterals(st);
2610         if (hasBogusLiterals) {
2611            VG_(printf)("bogus: ");
2612            ppIRStmt(st);
2613            VG_(printf)("\n");
2614         }
2615      }
2616      */
2617      first_stmt = bb->stmts_used;
2618
2619      if (verboze) {
2620         ppIRStmt(st);
2621         VG_(printf)("\n\n");
2622      }
2623
2624      switch (st->tag) {
2625
2626         case Ist_WrTmp:
2627            assign( bb, findShadowTmp(&mce, st->Ist.WrTmp.tmp),
2628                        expr2vbits( &mce, st->Ist.WrTmp.data) );
2629            break;
2630
2631         case Ist_Put:
2632            do_shadow_PUT( &mce,
2633                           st->Ist.Put.offset,
2634                           st->Ist.Put.data,
2635                           NULL /* shadow atom */ );
2636            break;
2637
2638         case Ist_PutI:
2639            do_shadow_PUTI( &mce,
2640                            st->Ist.PutI.descr,
2641                            st->Ist.PutI.ix,
2642                            st->Ist.PutI.bias,
2643                            st->Ist.PutI.data );
2644            break;
2645
2646         case Ist_Store:
2647            do_shadow_STle( &mce, st->Ist.Store.addr, 0/* addr bias */,
2648                                  st->Ist.Store.data,
2649                                  NULL /* shadow data */ );
2650            break;
2651
2652         case Ist_Exit:
2653            /* if (!hasBogusLiterals) */
2654               complainIfUndefined( &mce, st->Ist.Exit.guard );
2655            break;
2656
2657         case Ist_Dirty:
2658            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2659            break;
2660
2661         case Ist_IMark:
2662         case Ist_NoOp:
2663            break;
2664
2665         default:
2666            VG_(printf)("\n");
2667            ppIRStmt(st);
2668            VG_(printf)("\n");
2669            VG_(tool_panic)("memcheck: unhandled IRStmt");
2670
2671      } /* switch (st->tag) */
2672
2673      if (verboze) {
2674         for (j = first_stmt; j < bb->stmts_used; j++) {
2675            VG_(printf)("   ");
2676            ppIRStmt(bb->stmts[j]);
2677            VG_(printf)("\n");
2678         }
2679         VG_(printf)("\n");
2680      }
2681
2682      addStmtToIRSB(bb, st);
2683
2684   }
2685
2686   /* Now we need to complain if the jump target is undefined. */
2687   first_stmt = bb->stmts_used;
2688
2689   if (verboze) {
2690      VG_(printf)("bb->next = ");
2691      ppIRExpr(bb->next);
2692      VG_(printf)("\n\n");
2693   }
2694
2695   complainIfUndefined( &mce, bb->next );
2696
2697   if (verboze) {
2698      for (j = first_stmt; j < bb->stmts_used; j++) {
2699         VG_(printf)("   ");
2700         ppIRStmt(bb->stmts[j]);
2701         VG_(printf)("\n");
2702      }
2703      VG_(printf)("\n");
2704   }
2705
2706   return bb;
2707}
2708#endif /* UNUSED */
2709
2710/*--------------------------------------------------------------------*/
2711/*--- end                                              test_main.c ---*/
2712/*--------------------------------------------------------------------*/
2713