1
2/*--------------------------------------------------------------------*/
3/*--- Instrument IR to perform memory checking operations.         ---*/
4/*---                                               mc_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of MemCheck, a heavyweight Valgrind tool for
9   detecting memory errors.
10
11   Copyright (C) 2000-2011 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_tool_basics.h"
33#include "pub_tool_hashtable.h"     // For mc_include.h
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcprint.h"
36#include "pub_tool_tooliface.h"
37#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
38#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
41
42#include "mc_include.h"
43
44
45/* FIXMEs JRS 2011-June-16.
46
47   Check the interpretation for vector narrowing and widening ops,
48   particularly the saturating ones.  I suspect they are either overly
49   pessimistic and/or wrong.
50*/
51
52/* This file implements the Memcheck instrumentation, and in
53   particular contains the core of its undefined value detection
54   machinery.  For a comprehensive background of the terminology,
55   algorithms and rationale used herein, read:
56
57     Using Valgrind to detect undefined value errors with
58     bit-precision
59
60     Julian Seward and Nicholas Nethercote
61
62     2005 USENIX Annual Technical Conference (General Track),
63     Anaheim, CA, USA, April 10-15, 2005.
64
65   ----
66
67   Here is as good a place as any to record exactly when V bits are and
68   should be checked, why, and what function is responsible.
69
70
71   Memcheck complains when an undefined value is used:
72
73   1. In the condition of a conditional branch.  Because it could cause
74      incorrect control flow, and thus cause incorrect externally-visible
75      behaviour.  [mc_translate.c:complainIfUndefined]
76
77   2. As an argument to a system call, or as the value that specifies
78      the system call number.  Because it could cause an incorrect
79      externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
80
81   3. As the address in a load or store.  Because it could cause an
82      incorrect value to be used later, which could cause externally-visible
83      behaviour (eg. via incorrect control flow or an incorrect system call
84      argument)  [complainIfUndefined]
85
86   4. As the target address of a branch.  Because it could cause incorrect
87      control flow.  [complainIfUndefined]
88
89   5. As an argument to setenv, unsetenv, or putenv.  Because it could put
90      an incorrect value into the external environment.
91      [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
92
93   6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
94      [complainIfUndefined]
95
96   7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
97      VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
98      requested it.  [in memcheck.h]
99
100
101   Memcheck also complains, but should not, when an undefined value is used:
102
103   8. As the shift value in certain SIMD shift operations (but not in the
104      standard integer shift operations).  This inconsistency is due to
105      historical reasons.)  [complainIfUndefined]
106
107
108   Memcheck does not complain, but should, when an undefined value is used:
109
110   9. As an input to a client request.  Because the client request may
111      affect the visible behaviour -- see bug #144362 for an example
112      involving the malloc replacements in vg_replace_malloc.c and
113      VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
114      isn't identified.  That bug report also has some info on how to solve
115      the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
116
117
118   In practice, 1 and 2 account for the vast majority of cases.
119*/
120
121/*------------------------------------------------------------*/
122/*--- Forward decls                                        ---*/
123/*------------------------------------------------------------*/
124
125struct _MCEnv;
126
127static IRType  shadowTypeV ( IRType ty );
128static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
129static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
130
131static IRExpr *i128_const_zero(void);
132
133/*------------------------------------------------------------*/
134/*--- Memcheck running state, and tmp management.          ---*/
135/*------------------------------------------------------------*/
136
137/* Carries info about a particular tmp.  The tmp's number is not
138   recorded, as this is implied by (equal to) its index in the tmpMap
139   in MCEnv.  The tmp's type is also not recorded, as this is present
140   in MCEnv.sb->tyenv.
141
142   When .kind is Orig, .shadowV and .shadowB may give the identities
143   of the temps currently holding the associated definedness (shadowV)
144   and origin (shadowB) values, or these may be IRTemp_INVALID if code
145   to compute such values has not yet been emitted.
146
147   When .kind is VSh or BSh then the tmp is holds a V- or B- value,
148   and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
149   illogical for a shadow tmp itself to be shadowed.
150*/
151typedef
152   enum { Orig=1, VSh=2, BSh=3 }
153   TempKind;
154
155typedef
156   struct {
157      TempKind kind;
158      IRTemp   shadowV;
159      IRTemp   shadowB;
160   }
161   TempMapEnt;
162
163
164/* Carries around state during memcheck instrumentation. */
165typedef
166   struct _MCEnv {
167      /* MODIFIED: the superblock being constructed.  IRStmts are
168         added. */
169      IRSB* sb;
170      Bool  trace;
171
172      /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
173         current kind and possibly shadow temps for each temp in the
174         IRSB being constructed.  Note that it does not contain the
175         type of each tmp.  If you want to know the type, look at the
176         relevant entry in sb->tyenv.  It follows that at all times
177         during the instrumentation process, the valid indices for
178         tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
179         total number of Orig, V- and B- temps allocated so far.
180
181         The reason for this strange split (types in one place, all
182         other info in another) is that we need the types to be
183         attached to sb so as to make it possible to do
184         "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
185         instrumentation process. */
186      XArray* /* of TempMapEnt */ tmpMap;
187
188      /* MODIFIED: indicates whether "bogus" literals have so far been
189         found.  Starts off False, and may change to True. */
190      Bool    bogusLiterals;
191
192      /* READONLY: the guest layout.  This indicates which parts of
193         the guest state should be regarded as 'always defined'. */
194      VexGuestLayout* layout;
195
196      /* READONLY: the host word type.  Needed for constructing
197         arguments of type 'HWord' to be passed to helper functions.
198         Ity_I32 or Ity_I64 only. */
199      IRType hWordTy;
200   }
201   MCEnv;
202
203/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
204   demand), as they are encountered.  This is for two reasons.
205
206   (1) (less important reason): Many original tmps are unused due to
207   initial IR optimisation, and we do not want to spaces in tables
208   tracking them.
209
210   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
211   table indexed [0 .. n_types-1], which gives the current shadow for
212   each original tmp, or INVALID_IRTEMP if none is so far assigned.
213   It is necessary to support making multiple assignments to a shadow
214   -- specifically, after testing a shadow for definedness, it needs
215   to be made defined.  But IR's SSA property disallows this.
216
217   (2) (more important reason): Therefore, when a shadow needs to get
218   a new value, a new temporary is created, the value is assigned to
219   that, and the tmpMap is updated to reflect the new binding.
220
221   A corollary is that if the tmpMap maps a given tmp to
222   IRTemp_INVALID and we are hoping to read that shadow tmp, it means
223   there's a read-before-write error in the original tmps.  The IR
224   sanity checker should catch all such anomalies, however.
225*/
226
227/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
228   both the table in mce->sb and to our auxiliary mapping.  Note that
229   newTemp may cause mce->tmpMap to resize, hence previous results
230   from VG_(indexXA)(mce->tmpMap) are invalidated. */
231static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
232{
233   Word       newIx;
234   TempMapEnt ent;
235   IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
236   ent.kind    = kind;
237   ent.shadowV = IRTemp_INVALID;
238   ent.shadowB = IRTemp_INVALID;
239   newIx = VG_(addToXA)( mce->tmpMap, &ent );
240   tl_assert(newIx == (Word)tmp);
241   return tmp;
242}
243
244
245/* Find the tmp currently shadowing the given original tmp.  If none
246   so far exists, allocate one.  */
247static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
248{
249   TempMapEnt* ent;
250   /* VG_(indexXA) range-checks 'orig', hence no need to check
251      here. */
252   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
253   tl_assert(ent->kind == Orig);
254   if (ent->shadowV == IRTemp_INVALID) {
255      IRTemp tmpV
256        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
257      /* newTemp may cause mce->tmpMap to resize, hence previous results
258         from VG_(indexXA) are invalid. */
259      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260      tl_assert(ent->kind == Orig);
261      tl_assert(ent->shadowV == IRTemp_INVALID);
262      ent->shadowV = tmpV;
263   }
264   return ent->shadowV;
265}
266
267/* Allocate a new shadow for the given original tmp.  This means any
268   previous shadow is abandoned.  This is needed because it is
269   necessary to give a new value to a shadow once it has been tested
270   for undefinedness, but unfortunately IR's SSA property disallows
271   this.  Instead we must abandon the old shadow, allocate a new one
272   and use that instead.
273
274   This is the same as findShadowTmpV, except we don't bother to see
275   if a shadow temp already existed -- we simply allocate a new one
276   regardless. */
277static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
278{
279   TempMapEnt* ent;
280   /* VG_(indexXA) range-checks 'orig', hence no need to check
281      here. */
282   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
283   tl_assert(ent->kind == Orig);
284   if (1) {
285      IRTemp tmpV
286        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
287      /* newTemp may cause mce->tmpMap to resize, hence previous results
288         from VG_(indexXA) are invalid. */
289      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290      tl_assert(ent->kind == Orig);
291      ent->shadowV = tmpV;
292   }
293}
294
295
296/*------------------------------------------------------------*/
297/*--- IRAtoms -- a subset of IRExprs                       ---*/
298/*------------------------------------------------------------*/
299
300/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
301   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
302   input, most of this code deals in atoms.  Usefully, a value atom
303   always has a V-value which is also an atom: constants are shadowed
304   by constants, and temps are shadowed by the corresponding shadow
305   temporary. */
306
307typedef  IRExpr  IRAtom;
308
309/* (used for sanity checks only): is this an atom which looks
310   like it's from original code? */
311static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
312{
313   if (a1->tag == Iex_Const)
314      return True;
315   if (a1->tag == Iex_RdTmp) {
316      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
317      return ent->kind == Orig;
318   }
319   return False;
320}
321
322/* (used for sanity checks only): is this an atom which looks
323   like it's from shadow code? */
324static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
325{
326   if (a1->tag == Iex_Const)
327      return True;
328   if (a1->tag == Iex_RdTmp) {
329      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
330      return ent->kind == VSh || ent->kind == BSh;
331   }
332   return False;
333}
334
335/* (used for sanity checks only): check that both args are atoms and
336   are identically-kinded. */
337static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
338{
339   if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
340      return True;
341   if (a1->tag == Iex_Const && a2->tag == Iex_Const)
342      return True;
343   return False;
344}
345
346
347/*------------------------------------------------------------*/
348/*--- Type management                                      ---*/
349/*------------------------------------------------------------*/
350
351/* Shadow state is always accessed using integer types.  This returns
352   an integer type with the same size (as per sizeofIRType) as the
353   given type.  The only valid shadow types are Bit, I8, I16, I32,
354   I64, I128, V128. */
355
356static IRType shadowTypeV ( IRType ty )
357{
358   switch (ty) {
359      case Ity_I1:
360      case Ity_I8:
361      case Ity_I16:
362      case Ity_I32:
363      case Ity_I64:
364      case Ity_I128: return ty;
365      case Ity_F32:  return Ity_I32;
366      case Ity_F64:  return Ity_I64;
367      case Ity_F128: return Ity_I128;
368      case Ity_V128: return Ity_V128;
369      default: ppIRType(ty);
370               VG_(tool_panic)("memcheck:shadowTypeV");
371   }
372}
373
374/* Produce a 'defined' value of the given shadow type.  Should only be
375   supplied shadow types (Bit/I8/I16/I32/UI64). */
376static IRExpr* definedOfType ( IRType ty ) {
377   switch (ty) {
378      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
379      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
380      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
381      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
382      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
383      case Ity_I128: return i128_const_zero();
384      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
385      default:       VG_(tool_panic)("memcheck:definedOfType");
386   }
387}
388
389
390/*------------------------------------------------------------*/
391/*--- Constructing IR fragments                            ---*/
392/*------------------------------------------------------------*/
393
394/* add stmt to a bb */
395static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
396   if (mce->trace) {
397      VG_(printf)("  %c: ", cat);
398      ppIRStmt(st);
399      VG_(printf)("\n");
400   }
401   addStmtToIRSB(mce->sb, st);
402}
403
404/* assign value to tmp */
405static inline
406void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
407   stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
408}
409
410/* build various kinds of expressions */
411#define triop(_op, _arg1, _arg2, _arg3) \
412                                 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
413#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
414#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
415#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
416#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
417#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
418#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
419#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
420#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
421
422/* Bind the given expression to a new temporary, and return the
423   temporary.  This effectively converts an arbitrary expression into
424   an atom.
425
426   'ty' is the type of 'e' and hence the type that the new temporary
427   needs to be.  But passing it in is redundant, since we can deduce
428   the type merely by inspecting 'e'.  So at least use that fact to
429   assert that the two types agree. */
430static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
431{
432   TempKind k;
433   IRTemp   t;
434   IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
435   tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
436   switch (cat) {
437      case 'V': k = VSh;  break;
438      case 'B': k = BSh;  break;
439      case 'C': k = Orig; break;
440                /* happens when we are making up new "orig"
441                   expressions, for IRCAS handling */
442      default: tl_assert(0);
443   }
444   t = newTemp(mce, ty, k);
445   assign(cat, mce, t, e);
446   return mkexpr(t);
447}
448
449
450/*------------------------------------------------------------*/
451/*--- Helper functions for 128-bit ops                     ---*/
452/*------------------------------------------------------------*/
453static IRExpr *i128_const_zero(void)
454{
455  return binop(Iop_64HLto128, IRExpr_Const(IRConst_U64(0)),
456               IRExpr_Const(IRConst_U64(0)));
457}
458
459/* There are no 128-bit loads and/or stores. So we do not need to worry
460   about that in expr2vbits_Load */
461
462/*------------------------------------------------------------*/
463/*--- Constructing definedness primitive ops               ---*/
464/*------------------------------------------------------------*/
465
466/* --------- Defined-if-either-defined --------- */
467
468static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
469   tl_assert(isShadowAtom(mce,a1));
470   tl_assert(isShadowAtom(mce,a2));
471   return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
472}
473
474static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
475   tl_assert(isShadowAtom(mce,a1));
476   tl_assert(isShadowAtom(mce,a2));
477   return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
478}
479
480static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
481   tl_assert(isShadowAtom(mce,a1));
482   tl_assert(isShadowAtom(mce,a2));
483   return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
484}
485
486static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
487   tl_assert(isShadowAtom(mce,a1));
488   tl_assert(isShadowAtom(mce,a2));
489   return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
490}
491
492static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
493   tl_assert(isShadowAtom(mce,a1));
494   tl_assert(isShadowAtom(mce,a2));
495   return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
496}
497
498/* --------- Undefined-if-either-undefined --------- */
499
500static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
501   tl_assert(isShadowAtom(mce,a1));
502   tl_assert(isShadowAtom(mce,a2));
503   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
504}
505
506static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
507   tl_assert(isShadowAtom(mce,a1));
508   tl_assert(isShadowAtom(mce,a2));
509   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
510}
511
512static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
513   tl_assert(isShadowAtom(mce,a1));
514   tl_assert(isShadowAtom(mce,a2));
515   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
516}
517
518static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
519   tl_assert(isShadowAtom(mce,a1));
520   tl_assert(isShadowAtom(mce,a2));
521   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
522}
523
524static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
525   IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
526   tl_assert(isShadowAtom(mce,a1));
527   tl_assert(isShadowAtom(mce,a2));
528   tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
529   tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
530   tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
531   tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
532   tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
533   tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
534
535   return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
536}
537
538static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
539   tl_assert(isShadowAtom(mce,a1));
540   tl_assert(isShadowAtom(mce,a2));
541   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
542}
543
544static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
545   switch (vty) {
546      case Ity_I8:   return mkUifU8(mce, a1, a2);
547      case Ity_I16:  return mkUifU16(mce, a1, a2);
548      case Ity_I32:  return mkUifU32(mce, a1, a2);
549      case Ity_I64:  return mkUifU64(mce, a1, a2);
550      case Ity_I128: return mkUifU128(mce, a1, a2);
551      case Ity_V128: return mkUifUV128(mce, a1, a2);
552      default:
553         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
554         VG_(tool_panic)("memcheck:mkUifU");
555   }
556}
557
558/* --------- The Left-family of operations. --------- */
559
560static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
561   tl_assert(isShadowAtom(mce,a1));
562   return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
563}
564
565static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
566   tl_assert(isShadowAtom(mce,a1));
567   return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
568}
569
570static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
571   tl_assert(isShadowAtom(mce,a1));
572   return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
573}
574
575static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
576   tl_assert(isShadowAtom(mce,a1));
577   return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
578}
579
580/* --------- 'Improvement' functions for AND/OR. --------- */
581
582/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
583   defined (0); all other -> undefined (1).
584*/
585static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
586{
587   tl_assert(isOriginalAtom(mce, data));
588   tl_assert(isShadowAtom(mce, vbits));
589   tl_assert(sameKindedAtoms(data, vbits));
590   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
591}
592
593static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
594{
595   tl_assert(isOriginalAtom(mce, data));
596   tl_assert(isShadowAtom(mce, vbits));
597   tl_assert(sameKindedAtoms(data, vbits));
598   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
599}
600
601static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
602{
603   tl_assert(isOriginalAtom(mce, data));
604   tl_assert(isShadowAtom(mce, vbits));
605   tl_assert(sameKindedAtoms(data, vbits));
606   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
607}
608
609static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
610{
611   tl_assert(isOriginalAtom(mce, data));
612   tl_assert(isShadowAtom(mce, vbits));
613   tl_assert(sameKindedAtoms(data, vbits));
614   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
615}
616
617static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
618{
619   tl_assert(isOriginalAtom(mce, data));
620   tl_assert(isShadowAtom(mce, vbits));
621   tl_assert(sameKindedAtoms(data, vbits));
622   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
623}
624
625/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
626   defined (0); all other -> undefined (1).
627*/
628static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630   tl_assert(isOriginalAtom(mce, data));
631   tl_assert(isShadowAtom(mce, vbits));
632   tl_assert(sameKindedAtoms(data, vbits));
633   return assignNew(
634             'V', mce, Ity_I8,
635             binop(Iop_Or8,
636                   assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
637                   vbits) );
638}
639
640static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
641{
642   tl_assert(isOriginalAtom(mce, data));
643   tl_assert(isShadowAtom(mce, vbits));
644   tl_assert(sameKindedAtoms(data, vbits));
645   return assignNew(
646             'V', mce, Ity_I16,
647             binop(Iop_Or16,
648                   assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
649                   vbits) );
650}
651
652static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654   tl_assert(isOriginalAtom(mce, data));
655   tl_assert(isShadowAtom(mce, vbits));
656   tl_assert(sameKindedAtoms(data, vbits));
657   return assignNew(
658             'V', mce, Ity_I32,
659             binop(Iop_Or32,
660                   assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
661                   vbits) );
662}
663
664static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
665{
666   tl_assert(isOriginalAtom(mce, data));
667   tl_assert(isShadowAtom(mce, vbits));
668   tl_assert(sameKindedAtoms(data, vbits));
669   return assignNew(
670             'V', mce, Ity_I64,
671             binop(Iop_Or64,
672                   assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
673                   vbits) );
674}
675
676static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
677{
678   tl_assert(isOriginalAtom(mce, data));
679   tl_assert(isShadowAtom(mce, vbits));
680   tl_assert(sameKindedAtoms(data, vbits));
681   return assignNew(
682             'V', mce, Ity_V128,
683             binop(Iop_OrV128,
684                   assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
685                   vbits) );
686}
687
688/* --------- Pessimising casts. --------- */
689
690/* The function returns an expression of type DST_TY. If any of the VBITS
691   is undefined (value == 1) the resulting expression has all bits set to
692   1. Otherwise, all bits are 0. */
693
694static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
695{
696   IRType  src_ty;
697   IRAtom* tmp1;
698   /* Note, dst_ty is a shadow type, not an original type. */
699   /* First of all, collapse vbits down to a single bit. */
700   tl_assert(isShadowAtom(mce,vbits));
701   src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
702
703   /* Fast-track some common cases */
704   if (src_ty == Ity_I32 && dst_ty == Ity_I32)
705      return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
706
707   if (src_ty == Ity_I64 && dst_ty == Ity_I64)
708      return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
709
710   if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
711      IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
712      return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
713   }
714
715   /* Else do it the slow way .. */
716   tmp1   = NULL;
717   switch (src_ty) {
718      case Ity_I1:
719         tmp1 = vbits;
720         break;
721      case Ity_I8:
722         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
723         break;
724      case Ity_I16:
725         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
726         break;
727      case Ity_I32:
728         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
729         break;
730      case Ity_I64:
731         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
732         break;
733      case Ity_I128: {
734         /* Gah.  Chop it in half, OR the halves together, and compare
735            that with zero. */
736         IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
737         IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
738         IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
739         tmp1         = assignNew('V', mce, Ity_I1,
740                                       unop(Iop_CmpNEZ64, tmp4));
741         break;
742      }
743      default:
744         ppIRType(src_ty);
745         VG_(tool_panic)("mkPCastTo(1)");
746   }
747   tl_assert(tmp1);
748   /* Now widen up to the dst type. */
749   switch (dst_ty) {
750      case Ity_I1:
751         return tmp1;
752      case Ity_I8:
753         return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
754      case Ity_I16:
755         return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
756      case Ity_I32:
757         return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
758      case Ity_I64:
759         return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
760      case Ity_V128:
761         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
762         tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
763         return tmp1;
764      case Ity_I128:
765         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
766         tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
767         return tmp1;
768      default:
769         ppIRType(dst_ty);
770         VG_(tool_panic)("mkPCastTo(2)");
771   }
772}
773
774/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
775/*
776   Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
777   PCasting to Ity_U1.  However, sometimes it is necessary to be more
778   accurate.  The insight is that the result is defined if two
779   corresponding bits can be found, one from each argument, so that
780   both bits are defined but are different -- that makes EQ say "No"
781   and NE say "Yes".  Hence, we compute an improvement term and DifD
782   it onto the "normal" (UifU) result.
783
784   The result is:
785
786   PCastTo<1> (
787      -- naive version
788      PCastTo<sz>( UifU<sz>(vxx, vyy) )
789
790      `DifD<sz>`
791
792      -- improvement term
793      PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
794   )
795
796   where
797     vec contains 0 (defined) bits where the corresponding arg bits
798     are defined but different, and 1 bits otherwise.
799
800     vec = Or<sz>( vxx,   // 0 iff bit defined
801                   vyy,   // 0 iff bit defined
802                   Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
803                 )
804
805     If any bit of vec is 0, the result is defined and so the
806     improvement term should produce 0...0, else it should produce
807     1...1.
808
809     Hence require for the improvement term:
810
811        if vec == 1...1 then 1...1 else 0...0
812     ->
813        PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
814
815   This was extensively re-analysed and checked on 6 July 05.
816*/
817static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
818                                    IRType  ty,
819                                    IRAtom* vxx, IRAtom* vyy,
820                                    IRAtom* xx,  IRAtom* yy )
821{
822   IRAtom *naive, *vec, *improvement_term;
823   IRAtom *improved, *final_cast, *top;
824   IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
825
826   tl_assert(isShadowAtom(mce,vxx));
827   tl_assert(isShadowAtom(mce,vyy));
828   tl_assert(isOriginalAtom(mce,xx));
829   tl_assert(isOriginalAtom(mce,yy));
830   tl_assert(sameKindedAtoms(vxx,xx));
831   tl_assert(sameKindedAtoms(vyy,yy));
832
833   switch (ty) {
834      case Ity_I32:
835         opOR   = Iop_Or32;
836         opDIFD = Iop_And32;
837         opUIFU = Iop_Or32;
838         opNOT  = Iop_Not32;
839         opXOR  = Iop_Xor32;
840         opCMP  = Iop_CmpEQ32;
841         top    = mkU32(0xFFFFFFFF);
842         break;
843      case Ity_I64:
844         opOR   = Iop_Or64;
845         opDIFD = Iop_And64;
846         opUIFU = Iop_Or64;
847         opNOT  = Iop_Not64;
848         opXOR  = Iop_Xor64;
849         opCMP  = Iop_CmpEQ64;
850         top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
851         break;
852      default:
853         VG_(tool_panic)("expensiveCmpEQorNE");
854   }
855
856   naive
857      = mkPCastTo(mce,ty,
858                  assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
859
860   vec
861      = assignNew(
862           'V', mce,ty,
863           binop( opOR,
864                  assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
865                  assignNew(
866                     'V', mce,ty,
867                     unop( opNOT,
868                           assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
869
870   improvement_term
871      = mkPCastTo( mce,ty,
872                   assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
873
874   improved
875      = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
876
877   final_cast
878      = mkPCastTo( mce, Ity_I1, improved );
879
880   return final_cast;
881}
882
883
884/* --------- Semi-accurate interpretation of CmpORD. --------- */
885
886/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
887
888      CmpORD32S(x,y) = 1<<3   if  x <s y
889                     = 1<<2   if  x >s y
890                     = 1<<1   if  x == y
891
892   and similarly the unsigned variant.  The default interpretation is:
893
894      CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
895                                  & (7<<1)
896
897   The "& (7<<1)" reflects the fact that all result bits except 3,2,1
898   are zero and therefore defined (viz, zero).
899
900   Also deal with a special case better:
901
902      CmpORD32S(x,0)
903
904   Here, bit 3 (LT) of the result is a copy of the top bit of x and
905   will be defined even if the rest of x isn't.  In which case we do:
906
907      CmpORD32S#(x,x#,0,{impliedly 0}#)
908         = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
909           | (x# >>u 31) << 3      -- LT# = x#[31]
910
911   Analogous handling for CmpORD64{S,U}.
912*/
913static Bool isZeroU32 ( IRAtom* e )
914{
915   return
916      toBool( e->tag == Iex_Const
917              && e->Iex.Const.con->tag == Ico_U32
918              && e->Iex.Const.con->Ico.U32 == 0 );
919}
920
921static Bool isZeroU64 ( IRAtom* e )
922{
923   return
924      toBool( e->tag == Iex_Const
925              && e->Iex.Const.con->tag == Ico_U64
926              && e->Iex.Const.con->Ico.U64 == 0 );
927}
928
929static IRAtom* doCmpORD ( MCEnv*  mce,
930                          IROp    cmp_op,
931                          IRAtom* xxhash, IRAtom* yyhash,
932                          IRAtom* xx,     IRAtom* yy )
933{
934   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
935   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
936   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
937   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
938   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
939   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
940   IRType ty     = m64 ? Ity_I64   : Ity_I32;
941   Int    width  = m64 ? 64        : 32;
942
943   Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
944
945   IRAtom* threeLeft1 = NULL;
946   IRAtom* sevenLeft1 = NULL;
947
948   tl_assert(isShadowAtom(mce,xxhash));
949   tl_assert(isShadowAtom(mce,yyhash));
950   tl_assert(isOriginalAtom(mce,xx));
951   tl_assert(isOriginalAtom(mce,yy));
952   tl_assert(sameKindedAtoms(xxhash,xx));
953   tl_assert(sameKindedAtoms(yyhash,yy));
954   tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
955             || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
956
957   if (0) {
958      ppIROp(cmp_op); VG_(printf)(" ");
959      ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
960   }
961
962   if (syned && isZero(yy)) {
963      /* fancy interpretation */
964      /* if yy is zero, then it must be fully defined (zero#). */
965      tl_assert(isZero(yyhash));
966      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
967      return
968         binop(
969            opOR,
970            assignNew(
971               'V', mce,ty,
972               binop(
973                  opAND,
974                  mkPCastTo(mce,ty, xxhash),
975                  threeLeft1
976               )),
977            assignNew(
978               'V', mce,ty,
979               binop(
980                  opSHL,
981                  assignNew(
982                     'V', mce,ty,
983                     binop(opSHR, xxhash, mkU8(width-1))),
984                  mkU8(3)
985               ))
986	 );
987   } else {
988      /* standard interpretation */
989      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
990      return
991         binop(
992            opAND,
993            mkPCastTo( mce,ty,
994                       mkUifU(mce,ty, xxhash,yyhash)),
995            sevenLeft1
996         );
997   }
998}
999
1000
1001/*------------------------------------------------------------*/
1002/*--- Emit a test and complaint if something is undefined. ---*/
1003/*------------------------------------------------------------*/
1004
1005static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1006
1007
1008/* Set the annotations on a dirty helper to indicate that the stack
1009   pointer and instruction pointers might be read.  This is the
1010   behaviour of all 'emit-a-complaint' style functions we might
1011   call. */
1012
1013static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1014   di->nFxState = 2;
1015   di->fxState[0].fx     = Ifx_Read;
1016   di->fxState[0].offset = mce->layout->offset_SP;
1017   di->fxState[0].size   = mce->layout->sizeof_SP;
1018   di->fxState[1].fx     = Ifx_Read;
1019   di->fxState[1].offset = mce->layout->offset_IP;
1020   di->fxState[1].size   = mce->layout->sizeof_IP;
1021}
1022
1023
1024/* Check the supplied **original** atom for undefinedness, and emit a
1025   complaint if so.  Once that happens, mark it as defined.  This is
1026   possible because the atom is either a tmp or literal.  If it's a
1027   tmp, it will be shadowed by a tmp, and so we can set the shadow to
1028   be defined.  In fact as mentioned above, we will have to allocate a
1029   new tmp to carry the new 'defined' shadow value, and update the
1030   original->tmp mapping accordingly; we cannot simply assign a new
1031   value to an existing shadow tmp as this breaks SSAness -- resulting
1032   in the post-instrumentation sanity checker spluttering in disapproval.
1033*/
1034static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
1035{
1036   IRAtom*  vatom;
1037   IRType   ty;
1038   Int      sz;
1039   IRDirty* di;
1040   IRAtom*  cond;
1041   IRAtom*  origin;
1042   void*    fn;
1043   HChar*   nm;
1044   IRExpr** args;
1045   Int      nargs;
1046
1047   // Don't do V bit tests if we're not reporting undefined value errors.
1048   if (MC_(clo_mc_level) == 1)
1049      return;
1050
1051   /* Since the original expression is atomic, there's no duplicated
1052      work generated by making multiple V-expressions for it.  So we
1053      don't really care about the possibility that someone else may
1054      also create a V-interpretion for it. */
1055   tl_assert(isOriginalAtom(mce, atom));
1056   vatom = expr2vbits( mce, atom );
1057   tl_assert(isShadowAtom(mce, vatom));
1058   tl_assert(sameKindedAtoms(atom, vatom));
1059
1060   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1061
1062   /* sz is only used for constructing the error message */
1063   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1064
1065   cond = mkPCastTo( mce, Ity_I1, vatom );
1066   /* cond will be 0 if all defined, and 1 if any not defined. */
1067
1068   /* Get the origin info for the value we are about to check.  At
1069      least, if we are doing origin tracking.  If not, use a dummy
1070      zero origin. */
1071   if (MC_(clo_mc_level) == 3) {
1072      origin = schemeE( mce, atom );
1073      if (mce->hWordTy == Ity_I64) {
1074         origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1075      }
1076   } else {
1077      origin = NULL;
1078   }
1079
1080   fn    = NULL;
1081   nm    = NULL;
1082   args  = NULL;
1083   nargs = -1;
1084
1085   switch (sz) {
1086      case 0:
1087         if (origin) {
1088            fn    = &MC_(helperc_value_check0_fail_w_o);
1089            nm    = "MC_(helperc_value_check0_fail_w_o)";
1090            args  = mkIRExprVec_1(origin);
1091            nargs = 1;
1092         } else {
1093            fn    = &MC_(helperc_value_check0_fail_no_o);
1094            nm    = "MC_(helperc_value_check0_fail_no_o)";
1095            args  = mkIRExprVec_0();
1096            nargs = 0;
1097         }
1098         break;
1099      case 1:
1100         if (origin) {
1101            fn    = &MC_(helperc_value_check1_fail_w_o);
1102            nm    = "MC_(helperc_value_check1_fail_w_o)";
1103            args  = mkIRExprVec_1(origin);
1104            nargs = 1;
1105         } else {
1106            fn    = &MC_(helperc_value_check1_fail_no_o);
1107            nm    = "MC_(helperc_value_check1_fail_no_o)";
1108            args  = mkIRExprVec_0();
1109            nargs = 0;
1110         }
1111         break;
1112      case 4:
1113         if (origin) {
1114            fn    = &MC_(helperc_value_check4_fail_w_o);
1115            nm    = "MC_(helperc_value_check4_fail_w_o)";
1116            args  = mkIRExprVec_1(origin);
1117            nargs = 1;
1118         } else {
1119            fn    = &MC_(helperc_value_check4_fail_no_o);
1120            nm    = "MC_(helperc_value_check4_fail_no_o)";
1121            args  = mkIRExprVec_0();
1122            nargs = 0;
1123         }
1124         break;
1125      case 8:
1126         if (origin) {
1127            fn    = &MC_(helperc_value_check8_fail_w_o);
1128            nm    = "MC_(helperc_value_check8_fail_w_o)";
1129            args  = mkIRExprVec_1(origin);
1130            nargs = 1;
1131         } else {
1132            fn    = &MC_(helperc_value_check8_fail_no_o);
1133            nm    = "MC_(helperc_value_check8_fail_no_o)";
1134            args  = mkIRExprVec_0();
1135            nargs = 0;
1136         }
1137         break;
1138      case 2:
1139      case 16:
1140         if (origin) {
1141            fn    = &MC_(helperc_value_checkN_fail_w_o);
1142            nm    = "MC_(helperc_value_checkN_fail_w_o)";
1143            args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1144            nargs = 2;
1145         } else {
1146            fn    = &MC_(helperc_value_checkN_fail_no_o);
1147            nm    = "MC_(helperc_value_checkN_fail_no_o)";
1148            args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1149            nargs = 1;
1150         }
1151         break;
1152      default:
1153         VG_(tool_panic)("unexpected szB");
1154   }
1155
1156   tl_assert(fn);
1157   tl_assert(nm);
1158   tl_assert(args);
1159   tl_assert(nargs >= 0 && nargs <= 2);
1160   tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1161              || (MC_(clo_mc_level) == 2 && origin == NULL) );
1162
1163   di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1164                           VG_(fnptr_to_fnentry)( fn ), args );
1165   di->guard = cond;
1166   setHelperAnns( mce, di );
1167   stmt( 'V', mce, IRStmt_Dirty(di));
1168
1169   /* Set the shadow tmp to be defined.  First, update the
1170      orig->shadow tmp mapping to reflect the fact that this shadow is
1171      getting a new value. */
1172   tl_assert(isIRAtom(vatom));
1173   /* sameKindedAtoms ... */
1174   if (vatom->tag == Iex_RdTmp) {
1175      tl_assert(atom->tag == Iex_RdTmp);
1176      newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1177      assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1178                       definedOfType(ty));
1179   }
1180}
1181
1182
1183/*------------------------------------------------------------*/
1184/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1185/*------------------------------------------------------------*/
1186
1187/* Examine the always-defined sections declared in layout to see if
1188   the (offset,size) section is within one.  Note, is is an error to
1189   partially fall into such a region: (offset,size) should either be
1190   completely in such a region or completely not-in such a region.
1191*/
1192static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1193{
1194   Int minoffD, maxoffD, i;
1195   Int minoff = offset;
1196   Int maxoff = minoff + size - 1;
1197   tl_assert((minoff & ~0xFFFF) == 0);
1198   tl_assert((maxoff & ~0xFFFF) == 0);
1199
1200   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1201      minoffD = mce->layout->alwaysDefd[i].offset;
1202      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1203      tl_assert((minoffD & ~0xFFFF) == 0);
1204      tl_assert((maxoffD & ~0xFFFF) == 0);
1205
1206      if (maxoff < minoffD || maxoffD < minoff)
1207         continue; /* no overlap */
1208      if (minoff >= minoffD && maxoff <= maxoffD)
1209         return True; /* completely contained in an always-defd section */
1210
1211      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1212   }
1213   return False; /* could not find any containing section */
1214}
1215
1216
1217/* Generate into bb suitable actions to shadow this Put.  If the state
1218   slice is marked 'always defined', do nothing.  Otherwise, write the
1219   supplied V bits to the shadow state.  We can pass in either an
1220   original atom or a V-atom, but not both.  In the former case the
1221   relevant V-bits are then generated from the original.
1222*/
1223static
1224void do_shadow_PUT ( MCEnv* mce,  Int offset,
1225                     IRAtom* atom, IRAtom* vatom )
1226{
1227   IRType ty;
1228
1229   // Don't do shadow PUTs if we're not doing undefined value checking.
1230   // Their absence lets Vex's optimiser remove all the shadow computation
1231   // that they depend on, which includes GETs of the shadow registers.
1232   if (MC_(clo_mc_level) == 1)
1233      return;
1234
1235   if (atom) {
1236      tl_assert(!vatom);
1237      tl_assert(isOriginalAtom(mce, atom));
1238      vatom = expr2vbits( mce, atom );
1239   } else {
1240      tl_assert(vatom);
1241      tl_assert(isShadowAtom(mce, vatom));
1242   }
1243
1244   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1245   tl_assert(ty != Ity_I1);
1246   tl_assert(ty != Ity_I128);
1247   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1248      /* later: no ... */
1249      /* emit code to emit a complaint if any of the vbits are 1. */
1250      /* complainIfUndefined(mce, atom); */
1251   } else {
1252      /* Do a plain shadow Put. */
1253      stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1254   }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259   given GETI (passed in in pieces).
1260*/
1261static
1262void do_shadow_PUTI ( MCEnv* mce,
1263                      IRRegArray* descr,
1264                      IRAtom* ix, Int bias, IRAtom* atom )
1265{
1266   IRAtom* vatom;
1267   IRType  ty, tyS;
1268   Int     arrSize;;
1269
1270   // Don't do shadow PUTIs if we're not doing undefined value checking.
1271   // Their absence lets Vex's optimiser remove all the shadow computation
1272   // that they depend on, which includes GETIs of the shadow registers.
1273   if (MC_(clo_mc_level) == 1)
1274      return;
1275
1276   tl_assert(isOriginalAtom(mce,atom));
1277   vatom = expr2vbits( mce, atom );
1278   tl_assert(sameKindedAtoms(atom, vatom));
1279   ty   = descr->elemTy;
1280   tyS  = shadowTypeV(ty);
1281   arrSize = descr->nElems * sizeofIRType(ty);
1282   tl_assert(ty != Ity_I1);
1283   tl_assert(isOriginalAtom(mce,ix));
1284   complainIfUndefined(mce,ix);
1285   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1286      /* later: no ... */
1287      /* emit code to emit a complaint if any of the vbits are 1. */
1288      /* complainIfUndefined(mce, atom); */
1289   } else {
1290      /* Do a cloned version of the Put that refers to the shadow
1291         area. */
1292      IRRegArray* new_descr
1293         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1294                         tyS, descr->nElems);
1295      stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
1296   }
1297}
1298
1299
1300/* Return an expression which contains the V bits corresponding to the
1301   given GET (passed in in pieces).
1302*/
1303static
1304IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1305{
1306   IRType tyS = shadowTypeV(ty);
1307   tl_assert(ty != Ity_I1);
1308   tl_assert(ty != Ity_I128);
1309   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1310      /* Always defined, return all zeroes of the relevant type */
1311      return definedOfType(tyS);
1312   } else {
1313      /* return a cloned version of the Get that refers to the shadow
1314         area. */
1315      /* FIXME: this isn't an atom! */
1316      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1317   }
1318}
1319
1320
1321/* Return an expression which contains the V bits corresponding to the
1322   given GETI (passed in in pieces).
1323*/
1324static
1325IRExpr* shadow_GETI ( MCEnv* mce,
1326                      IRRegArray* descr, IRAtom* ix, Int bias )
1327{
1328   IRType ty   = descr->elemTy;
1329   IRType tyS  = shadowTypeV(ty);
1330   Int arrSize = descr->nElems * sizeofIRType(ty);
1331   tl_assert(ty != Ity_I1);
1332   tl_assert(isOriginalAtom(mce,ix));
1333   complainIfUndefined(mce,ix);
1334   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1335      /* Always defined, return all zeroes of the relevant type */
1336      return definedOfType(tyS);
1337   } else {
1338      /* return a cloned version of the Get that refers to the shadow
1339         area. */
1340      IRRegArray* new_descr
1341         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1342                         tyS, descr->nElems);
1343      return IRExpr_GetI( new_descr, ix, bias );
1344   }
1345}
1346
1347
1348/*------------------------------------------------------------*/
1349/*--- Generating approximations for unknown operations,    ---*/
1350/*--- using lazy-propagate semantics                       ---*/
1351/*------------------------------------------------------------*/
1352
1353/* Lazy propagation of undefinedness from two values, resulting in the
1354   specified shadow type.
1355*/
1356static
1357IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1358{
1359   IRAtom* at;
1360   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1361   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1362   tl_assert(isShadowAtom(mce,va1));
1363   tl_assert(isShadowAtom(mce,va2));
1364
1365   /* The general case is inefficient because PCast is an expensive
1366      operation.  Here are some special cases which use PCast only
1367      once rather than twice. */
1368
1369   /* I64 x I64 -> I64 */
1370   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1371      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1372      at = mkUifU(mce, Ity_I64, va1, va2);
1373      at = mkPCastTo(mce, Ity_I64, at);
1374      return at;
1375   }
1376
1377   /* I64 x I64 -> I32 */
1378   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1379      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1380      at = mkUifU(mce, Ity_I64, va1, va2);
1381      at = mkPCastTo(mce, Ity_I32, at);
1382      return at;
1383   }
1384
1385   if (0) {
1386      VG_(printf)("mkLazy2 ");
1387      ppIRType(t1);
1388      VG_(printf)("_");
1389      ppIRType(t2);
1390      VG_(printf)("_");
1391      ppIRType(finalVty);
1392      VG_(printf)("\n");
1393   }
1394
1395   /* General case: force everything via 32-bit intermediaries. */
1396   at = mkPCastTo(mce, Ity_I32, va1);
1397   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1398   at = mkPCastTo(mce, finalVty, at);
1399   return at;
1400}
1401
1402
1403/* 3-arg version of the above. */
1404static
1405IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1406                  IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1407{
1408   IRAtom* at;
1409   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1410   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1411   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1412   tl_assert(isShadowAtom(mce,va1));
1413   tl_assert(isShadowAtom(mce,va2));
1414   tl_assert(isShadowAtom(mce,va3));
1415
1416   /* The general case is inefficient because PCast is an expensive
1417      operation.  Here are some special cases which use PCast only
1418      twice rather than three times. */
1419
1420   /* I32 x I64 x I64 -> I64 */
1421   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1422   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1423       && finalVty == Ity_I64) {
1424      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1425      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1426         mode indication which is fully defined, this should get
1427         folded out later. */
1428      at = mkPCastTo(mce, Ity_I64, va1);
1429      /* Now fold in 2nd and 3rd args. */
1430      at = mkUifU(mce, Ity_I64, at, va2);
1431      at = mkUifU(mce, Ity_I64, at, va3);
1432      /* and PCast once again. */
1433      at = mkPCastTo(mce, Ity_I64, at);
1434      return at;
1435   }
1436
1437   /* I32 x I64 x I64 -> I32 */
1438   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1439       && finalVty == Ity_I32) {
1440      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1441      at = mkPCastTo(mce, Ity_I64, va1);
1442      at = mkUifU(mce, Ity_I64, at, va2);
1443      at = mkUifU(mce, Ity_I64, at, va3);
1444      at = mkPCastTo(mce, Ity_I32, at);
1445      return at;
1446   }
1447
1448   /* I32 x I32 x I32 -> I32 */
1449   /* 32-bit FP idiom, as (eg) happens on ARM */
1450   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1451       && finalVty == Ity_I32) {
1452      if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1453      at = va1;
1454      at = mkUifU(mce, Ity_I32, at, va2);
1455      at = mkUifU(mce, Ity_I32, at, va3);
1456      at = mkPCastTo(mce, Ity_I32, at);
1457      return at;
1458   }
1459
1460   /* I32 x I128 x I128 -> I128 */
1461   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1462   if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1463       && finalVty == Ity_I128) {
1464      if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1465      /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1466         mode indication which is fully defined, this should get
1467         folded out later. */
1468      at = mkPCastTo(mce, Ity_I128, va1);
1469      /* Now fold in 2nd and 3rd args. */
1470      at = mkUifU(mce, Ity_I128, at, va2);
1471      at = mkUifU(mce, Ity_I128, at, va3);
1472      /* and PCast once again. */
1473      at = mkPCastTo(mce, Ity_I128, at);
1474      return at;
1475   }
1476   if (1) {
1477      VG_(printf)("mkLazy3: ");
1478      ppIRType(t1);
1479      VG_(printf)(" x ");
1480      ppIRType(t2);
1481      VG_(printf)(" x ");
1482      ppIRType(t3);
1483      VG_(printf)(" -> ");
1484      ppIRType(finalVty);
1485      VG_(printf)("\n");
1486   }
1487
1488   tl_assert(0);
1489   /* General case: force everything via 32-bit intermediaries. */
1490   /*
1491   at = mkPCastTo(mce, Ity_I32, va1);
1492   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1493   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1494   at = mkPCastTo(mce, finalVty, at);
1495   return at;
1496   */
1497}
1498
1499
1500/* 4-arg version of the above. */
1501static
1502IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1503                  IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1504{
1505   IRAtom* at;
1506   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1507   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1508   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1509   IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1510   tl_assert(isShadowAtom(mce,va1));
1511   tl_assert(isShadowAtom(mce,va2));
1512   tl_assert(isShadowAtom(mce,va3));
1513   tl_assert(isShadowAtom(mce,va4));
1514
1515   /* The general case is inefficient because PCast is an expensive
1516      operation.  Here are some special cases which use PCast only
1517      twice rather than three times. */
1518
1519   /* I32 x I64 x I64 x I64 -> I64 */
1520   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1521   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1522       && finalVty == Ity_I64) {
1523      if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1524      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1525         mode indication which is fully defined, this should get
1526         folded out later. */
1527      at = mkPCastTo(mce, Ity_I64, va1);
1528      /* Now fold in 2nd, 3rd, 4th args. */
1529      at = mkUifU(mce, Ity_I64, at, va2);
1530      at = mkUifU(mce, Ity_I64, at, va3);
1531      at = mkUifU(mce, Ity_I64, at, va4);
1532      /* and PCast once again. */
1533      at = mkPCastTo(mce, Ity_I64, at);
1534      return at;
1535   }
1536   /* I32 x I32 x I32 x I32 -> I32 */
1537   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1538   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1539       && finalVty == Ity_I32) {
1540      if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1541      at = va1;
1542      /* Now fold in 2nd, 3rd, 4th args. */
1543      at = mkUifU(mce, Ity_I32, at, va2);
1544      at = mkUifU(mce, Ity_I32, at, va3);
1545      at = mkUifU(mce, Ity_I32, at, va4);
1546      at = mkPCastTo(mce, Ity_I32, at);
1547      return at;
1548   }
1549
1550   if (1) {
1551      VG_(printf)("mkLazy4: ");
1552      ppIRType(t1);
1553      VG_(printf)(" x ");
1554      ppIRType(t2);
1555      VG_(printf)(" x ");
1556      ppIRType(t3);
1557      VG_(printf)(" x ");
1558      ppIRType(t4);
1559      VG_(printf)(" -> ");
1560      ppIRType(finalVty);
1561      VG_(printf)("\n");
1562   }
1563
1564   tl_assert(0);
1565}
1566
1567
1568/* Do the lazy propagation game from a null-terminated vector of
1569   atoms.  This is presumably the arguments to a helper call, so the
1570   IRCallee info is also supplied in order that we can know which
1571   arguments should be ignored (via the .mcx_mask field).
1572*/
1573static
1574IRAtom* mkLazyN ( MCEnv* mce,
1575                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1576{
1577   Int     i;
1578   IRAtom* here;
1579   IRAtom* curr;
1580   IRType  mergeTy;
1581   Bool    mergeTy64 = True;
1582
1583   /* Decide on the type of the merge intermediary.  If all relevant
1584      args are I64, then it's I64.  In all other circumstances, use
1585      I32. */
1586   for (i = 0; exprvec[i]; i++) {
1587      tl_assert(i < 32);
1588      tl_assert(isOriginalAtom(mce, exprvec[i]));
1589      if (cee->mcx_mask & (1<<i))
1590         continue;
1591      if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1592         mergeTy64 = False;
1593   }
1594
1595   mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1596   curr    = definedOfType(mergeTy);
1597
1598   for (i = 0; exprvec[i]; i++) {
1599      tl_assert(i < 32);
1600      tl_assert(isOriginalAtom(mce, exprvec[i]));
1601      /* Only take notice of this arg if the callee's mc-exclusion
1602         mask does not say it is to be excluded. */
1603      if (cee->mcx_mask & (1<<i)) {
1604         /* the arg is to be excluded from definedness checking.  Do
1605            nothing. */
1606         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1607      } else {
1608         /* calculate the arg's definedness, and pessimistically merge
1609            it in. */
1610         here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1611         curr = mergeTy64
1612                   ? mkUifU64(mce, here, curr)
1613                   : mkUifU32(mce, here, curr);
1614      }
1615   }
1616   return mkPCastTo(mce, finalVtype, curr );
1617}
1618
1619
1620/*------------------------------------------------------------*/
1621/*--- Generating expensive sequences for exact carry-chain ---*/
1622/*--- propagation in add/sub and related operations.       ---*/
1623/*------------------------------------------------------------*/
1624
1625static
1626IRAtom* expensiveAddSub ( MCEnv*  mce,
1627                          Bool    add,
1628                          IRType  ty,
1629                          IRAtom* qaa, IRAtom* qbb,
1630                          IRAtom* aa,  IRAtom* bb )
1631{
1632   IRAtom *a_min, *b_min, *a_max, *b_max;
1633   IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1634
1635   tl_assert(isShadowAtom(mce,qaa));
1636   tl_assert(isShadowAtom(mce,qbb));
1637   tl_assert(isOriginalAtom(mce,aa));
1638   tl_assert(isOriginalAtom(mce,bb));
1639   tl_assert(sameKindedAtoms(qaa,aa));
1640   tl_assert(sameKindedAtoms(qbb,bb));
1641
1642   switch (ty) {
1643      case Ity_I32:
1644         opAND = Iop_And32;
1645         opOR  = Iop_Or32;
1646         opXOR = Iop_Xor32;
1647         opNOT = Iop_Not32;
1648         opADD = Iop_Add32;
1649         opSUB = Iop_Sub32;
1650         break;
1651      case Ity_I64:
1652         opAND = Iop_And64;
1653         opOR  = Iop_Or64;
1654         opXOR = Iop_Xor64;
1655         opNOT = Iop_Not64;
1656         opADD = Iop_Add64;
1657         opSUB = Iop_Sub64;
1658         break;
1659      default:
1660         VG_(tool_panic)("expensiveAddSub");
1661   }
1662
1663   // a_min = aa & ~qaa
1664   a_min = assignNew('V', mce,ty,
1665                     binop(opAND, aa,
1666                                  assignNew('V', mce,ty, unop(opNOT, qaa))));
1667
1668   // b_min = bb & ~qbb
1669   b_min = assignNew('V', mce,ty,
1670                     binop(opAND, bb,
1671                                  assignNew('V', mce,ty, unop(opNOT, qbb))));
1672
1673   // a_max = aa | qaa
1674   a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1675
1676   // b_max = bb | qbb
1677   b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1678
1679   if (add) {
1680      // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1681      return
1682      assignNew('V', mce,ty,
1683         binop( opOR,
1684                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1685                assignNew('V', mce,ty,
1686                   binop( opXOR,
1687                          assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1688                          assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1689                   )
1690                )
1691         )
1692      );
1693   } else {
1694      // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1695      return
1696      assignNew('V', mce,ty,
1697         binop( opOR,
1698                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1699                assignNew('V', mce,ty,
1700                   binop( opXOR,
1701                          assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1702                          assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1703                   )
1704                )
1705         )
1706      );
1707   }
1708
1709}
1710
1711
1712/*------------------------------------------------------------*/
1713/*--- Scalar shifts.                                       ---*/
1714/*------------------------------------------------------------*/
1715
1716/* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1717   idea is to shift the definedness bits by the original shift amount.
1718   This introduces 0s ("defined") in new positions for left shifts and
1719   unsigned right shifts, and copies the top definedness bit for
1720   signed right shifts.  So, conveniently, applying the original shift
1721   operator to the definedness bits for the left arg is exactly the
1722   right thing to do:
1723
1724      (qaa << bb)
1725
1726   However if the shift amount is undefined then the whole result
1727   is undefined.  Hence need:
1728
1729      (qaa << bb) `UifU` PCast(qbb)
1730
1731   If the shift amount bb is a literal than qbb will say 'all defined'
1732   and the UifU and PCast will get folded out by post-instrumentation
1733   optimisation.
1734*/
1735static IRAtom* scalarShift ( MCEnv*  mce,
1736                             IRType  ty,
1737                             IROp    original_op,
1738                             IRAtom* qaa, IRAtom* qbb,
1739                             IRAtom* aa,  IRAtom* bb )
1740{
1741   tl_assert(isShadowAtom(mce,qaa));
1742   tl_assert(isShadowAtom(mce,qbb));
1743   tl_assert(isOriginalAtom(mce,aa));
1744   tl_assert(isOriginalAtom(mce,bb));
1745   tl_assert(sameKindedAtoms(qaa,aa));
1746   tl_assert(sameKindedAtoms(qbb,bb));
1747   return
1748      assignNew(
1749         'V', mce, ty,
1750         mkUifU( mce, ty,
1751                 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1752                 mkPCastTo(mce, ty, qbb)
1753         )
1754   );
1755}
1756
1757
1758/*------------------------------------------------------------*/
1759/*--- Helpers for dealing with vector primops.             ---*/
1760/*------------------------------------------------------------*/
1761
1762/* Vector pessimisation -- pessimise within each lane individually. */
1763
1764static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1765{
1766   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1767}
1768
1769static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1770{
1771   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1772}
1773
1774static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1775{
1776   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1777}
1778
1779static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1780{
1781   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1782}
1783
1784static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1785{
1786   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1787}
1788
1789static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1790{
1791   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1792}
1793
1794static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1795{
1796   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1797}
1798
1799static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1800{
1801   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1802}
1803
1804static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1805{
1806   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1807}
1808
1809
1810/* Here's a simple scheme capable of handling ops derived from SSE1
1811   code and while only generating ops that can be efficiently
1812   implemented in SSE1. */
1813
1814/* All-lanes versions are straightforward:
1815
1816   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1817
1818   unary32Fx4(x,y)    ==> PCast32x4(x#)
1819
1820   Lowest-lane-only versions are more complex:
1821
1822   binary32F0x4(x,y)  ==> SetV128lo32(
1823                             x#,
1824                             PCast32(V128to32(UifUV128(x#,y#)))
1825                          )
1826
1827   This is perhaps not so obvious.  In particular, it's faster to
1828   do a V128-bit UifU and then take the bottom 32 bits than the more
1829   obvious scheme of taking the bottom 32 bits of each operand
1830   and doing a 32-bit UifU.  Basically since UifU is fast and
1831   chopping lanes off vector values is slow.
1832
1833   Finally:
1834
1835   unary32F0x4(x)     ==> SetV128lo32(
1836                             x#,
1837                             PCast32(V128to32(x#))
1838                          )
1839
1840   Where:
1841
1842   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1843   PCast32x4(v#) = CmpNEZ32x4(v#)
1844*/
1845
1846static
1847IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1848{
1849   IRAtom* at;
1850   tl_assert(isShadowAtom(mce, vatomX));
1851   tl_assert(isShadowAtom(mce, vatomY));
1852   at = mkUifUV128(mce, vatomX, vatomY);
1853   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1854   return at;
1855}
1856
1857static
1858IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1859{
1860   IRAtom* at;
1861   tl_assert(isShadowAtom(mce, vatomX));
1862   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1863   return at;
1864}
1865
1866static
1867IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868{
1869   IRAtom* at;
1870   tl_assert(isShadowAtom(mce, vatomX));
1871   tl_assert(isShadowAtom(mce, vatomY));
1872   at = mkUifUV128(mce, vatomX, vatomY);
1873   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1874   at = mkPCastTo(mce, Ity_I32, at);
1875   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1876   return at;
1877}
1878
1879static
1880IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1881{
1882   IRAtom* at;
1883   tl_assert(isShadowAtom(mce, vatomX));
1884   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1885   at = mkPCastTo(mce, Ity_I32, at);
1886   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1887   return at;
1888}
1889
1890/* --- ... and ... 64Fx2 versions of the same ... --- */
1891
1892static
1893IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1894{
1895   IRAtom* at;
1896   tl_assert(isShadowAtom(mce, vatomX));
1897   tl_assert(isShadowAtom(mce, vatomY));
1898   at = mkUifUV128(mce, vatomX, vatomY);
1899   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1900   return at;
1901}
1902
1903static
1904IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1905{
1906   IRAtom* at;
1907   tl_assert(isShadowAtom(mce, vatomX));
1908   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
1909   return at;
1910}
1911
1912static
1913IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1914{
1915   IRAtom* at;
1916   tl_assert(isShadowAtom(mce, vatomX));
1917   tl_assert(isShadowAtom(mce, vatomY));
1918   at = mkUifUV128(mce, vatomX, vatomY);
1919   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
1920   at = mkPCastTo(mce, Ity_I64, at);
1921   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1922   return at;
1923}
1924
1925static
1926IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1927{
1928   IRAtom* at;
1929   tl_assert(isShadowAtom(mce, vatomX));
1930   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
1931   at = mkPCastTo(mce, Ity_I64, at);
1932   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1933   return at;
1934}
1935
1936/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1937
1938static
1939IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1940{
1941   IRAtom* at;
1942   tl_assert(isShadowAtom(mce, vatomX));
1943   tl_assert(isShadowAtom(mce, vatomY));
1944   at = mkUifU64(mce, vatomX, vatomY);
1945   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1946   return at;
1947}
1948
1949static
1950IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1951{
1952   IRAtom* at;
1953   tl_assert(isShadowAtom(mce, vatomX));
1954   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1955   return at;
1956}
1957
1958/* --- --- Vector saturated narrowing --- --- */
1959
1960/* We used to do something very clever here, but on closer inspection
1961   (2011-Jun-15), and in particular bug #279698, it turns out to be
1962   wrong.  Part of the problem came from the fact that for a long
1963   time, the IR primops to do with saturated narrowing were
1964   underspecified and managed to confuse multiple cases which needed
1965   to be separate: the op names had a signedness qualifier, but in
1966   fact the source and destination signednesses needed to be specified
1967   independently, so the op names really need two independent
1968   signedness specifiers.
1969
1970   As of 2011-Jun-15 (ish) the underspecification was sorted out
1971   properly.  The incorrect instrumentation remained, though.  That
1972   has now (2011-Oct-22) been fixed.
1973
1974   What we now do is simple:
1975
1976   Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
1977   number of lanes, X is the source lane width and signedness, and Y
1978   is the destination lane width and signedness.  In all cases the
1979   destination lane width is half the source lane width, so the names
1980   have a bit of redundancy, but are at least easy to read.
1981
1982   For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
1983   to unsigned 16s.
1984
1985   Let Vanilla(OP) be a function that takes OP, one of these
1986   saturating narrowing ops, and produces the same "shaped" narrowing
1987   op which is not saturating, but merely dumps the most significant
1988   bits.  "same shape" means that the lane numbers and widths are the
1989   same as with OP.
1990
1991   For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
1992                  = Iop_NarrowBin32to16x8,
1993   that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
1994   dumping the top half of each lane.
1995
1996   So, with that in place, the scheme is simple, and it is simple to
1997   pessimise each lane individually and then apply Vanilla(OP) so as
1998   to get the result in the right "shape".  If the original OP is
1999   QNarrowBinXtoYxZ then we produce
2000
2001   Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2002
2003   or for the case when OP is unary (Iop_QNarrowUn*)
2004
2005   Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2006*/
2007static
2008IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2009{
2010   switch (qnarrowOp) {
2011      /* Binary: (128, 128) -> 128 */
2012      case Iop_QNarrowBin16Sto8Ux16:
2013      case Iop_QNarrowBin16Sto8Sx16:
2014      case Iop_QNarrowBin16Uto8Ux16:
2015         return Iop_NarrowBin16to8x16;
2016      case Iop_QNarrowBin32Sto16Ux8:
2017      case Iop_QNarrowBin32Sto16Sx8:
2018      case Iop_QNarrowBin32Uto16Ux8:
2019         return Iop_NarrowBin32to16x8;
2020      /* Binary: (64, 64) -> 64 */
2021      case Iop_QNarrowBin32Sto16Sx4:
2022         return Iop_NarrowBin32to16x4;
2023      case Iop_QNarrowBin16Sto8Ux8:
2024      case Iop_QNarrowBin16Sto8Sx8:
2025         return Iop_NarrowBin16to8x8;
2026      /* Unary: 128 -> 64 */
2027      case Iop_QNarrowUn64Uto32Ux2:
2028      case Iop_QNarrowUn64Sto32Sx2:
2029      case Iop_QNarrowUn64Sto32Ux2:
2030         return Iop_NarrowUn64to32x2;
2031      case Iop_QNarrowUn32Uto16Ux4:
2032      case Iop_QNarrowUn32Sto16Sx4:
2033      case Iop_QNarrowUn32Sto16Ux4:
2034         return Iop_NarrowUn32to16x4;
2035      case Iop_QNarrowUn16Uto8Ux8:
2036      case Iop_QNarrowUn16Sto8Sx8:
2037      case Iop_QNarrowUn16Sto8Ux8:
2038         return Iop_NarrowUn16to8x8;
2039      default:
2040         ppIROp(qnarrowOp);
2041         VG_(tool_panic)("vanillaNarrowOpOfShape");
2042   }
2043}
2044
2045static
2046IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2047                              IRAtom* vatom1, IRAtom* vatom2)
2048{
2049   IRAtom *at1, *at2, *at3;
2050   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2051   switch (narrow_op) {
2052      case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2053      case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2054      case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2055      case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2056      case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2057      case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2058      default: VG_(tool_panic)("vectorNarrowBinV128");
2059   }
2060   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2061   tl_assert(isShadowAtom(mce,vatom1));
2062   tl_assert(isShadowAtom(mce,vatom2));
2063   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2064   at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2065   at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2066   return at3;
2067}
2068
2069static
2070IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2071                            IRAtom* vatom1, IRAtom* vatom2)
2072{
2073   IRAtom *at1, *at2, *at3;
2074   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2075   switch (narrow_op) {
2076      case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2077      case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2078      case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2079      default: VG_(tool_panic)("vectorNarrowBin64");
2080   }
2081   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2082   tl_assert(isShadowAtom(mce,vatom1));
2083   tl_assert(isShadowAtom(mce,vatom2));
2084   at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2085   at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2086   at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2087   return at3;
2088}
2089
2090static
2091IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2092                             IRAtom* vatom1)
2093{
2094   IRAtom *at1, *at2;
2095   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2096   tl_assert(isShadowAtom(mce,vatom1));
2097   /* For vanilla narrowing (non-saturating), we can just apply
2098      the op directly to the V bits. */
2099   switch (narrow_op) {
2100      case Iop_NarrowUn16to8x8:
2101      case Iop_NarrowUn32to16x4:
2102      case Iop_NarrowUn64to32x2:
2103         at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2104         return at1;
2105      default:
2106         break; /* Do Plan B */
2107   }
2108   /* Plan B: for ops that involve a saturation operation on the args,
2109      we must PCast before the vanilla narrow. */
2110   switch (narrow_op) {
2111      case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2112      case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2113      case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2114      case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2115      case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2116      case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2117      case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2118      case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2119      case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2120      default: VG_(tool_panic)("vectorNarrowUnV128");
2121   }
2122   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2123   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2124   at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2125   return at2;
2126}
2127
2128static
2129IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2130                         IRAtom* vatom1)
2131{
2132   IRAtom *at1, *at2;
2133   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2134   switch (longen_op) {
2135      case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2136      case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2137      case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2138      case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2139      case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2140      case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2141      default: VG_(tool_panic)("vectorWidenI64");
2142   }
2143   tl_assert(isShadowAtom(mce,vatom1));
2144   at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2145   at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2146   return at2;
2147}
2148
2149
2150/* --- --- Vector integer arithmetic --- --- */
2151
2152/* Simple ... UifU the args and per-lane pessimise the results. */
2153
2154/* --- V128-bit versions --- */
2155
2156static
2157IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2158{
2159   IRAtom* at;
2160   at = mkUifUV128(mce, vatom1, vatom2);
2161   at = mkPCast8x16(mce, at);
2162   return at;
2163}
2164
2165static
2166IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2167{
2168   IRAtom* at;
2169   at = mkUifUV128(mce, vatom1, vatom2);
2170   at = mkPCast16x8(mce, at);
2171   return at;
2172}
2173
2174static
2175IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2176{
2177   IRAtom* at;
2178   at = mkUifUV128(mce, vatom1, vatom2);
2179   at = mkPCast32x4(mce, at);
2180   return at;
2181}
2182
2183static
2184IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2185{
2186   IRAtom* at;
2187   at = mkUifUV128(mce, vatom1, vatom2);
2188   at = mkPCast64x2(mce, at);
2189   return at;
2190}
2191
2192/* --- 64-bit versions --- */
2193
2194static
2195IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2196{
2197   IRAtom* at;
2198   at = mkUifU64(mce, vatom1, vatom2);
2199   at = mkPCast8x8(mce, at);
2200   return at;
2201}
2202
2203static
2204IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2205{
2206   IRAtom* at;
2207   at = mkUifU64(mce, vatom1, vatom2);
2208   at = mkPCast16x4(mce, at);
2209   return at;
2210}
2211
2212static
2213IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2214{
2215   IRAtom* at;
2216   at = mkUifU64(mce, vatom1, vatom2);
2217   at = mkPCast32x2(mce, at);
2218   return at;
2219}
2220
2221static
2222IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2223{
2224   IRAtom* at;
2225   at = mkUifU64(mce, vatom1, vatom2);
2226   at = mkPCastTo(mce, Ity_I64, at);
2227   return at;
2228}
2229
2230/* --- 32-bit versions --- */
2231
2232static
2233IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2234{
2235   IRAtom* at;
2236   at = mkUifU32(mce, vatom1, vatom2);
2237   at = mkPCast8x4(mce, at);
2238   return at;
2239}
2240
2241static
2242IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2243{
2244   IRAtom* at;
2245   at = mkUifU32(mce, vatom1, vatom2);
2246   at = mkPCast16x2(mce, at);
2247   return at;
2248}
2249
2250
2251/*------------------------------------------------------------*/
2252/*--- Generate shadow values from all kinds of IRExprs.    ---*/
2253/*------------------------------------------------------------*/
2254
2255static
2256IRAtom* expr2vbits_Qop ( MCEnv* mce,
2257                         IROp op,
2258                         IRAtom* atom1, IRAtom* atom2,
2259                         IRAtom* atom3, IRAtom* atom4 )
2260{
2261   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2262   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2263   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2264   IRAtom* vatom4 = expr2vbits( mce, atom4 );
2265
2266   tl_assert(isOriginalAtom(mce,atom1));
2267   tl_assert(isOriginalAtom(mce,atom2));
2268   tl_assert(isOriginalAtom(mce,atom3));
2269   tl_assert(isOriginalAtom(mce,atom4));
2270   tl_assert(isShadowAtom(mce,vatom1));
2271   tl_assert(isShadowAtom(mce,vatom2));
2272   tl_assert(isShadowAtom(mce,vatom3));
2273   tl_assert(isShadowAtom(mce,vatom4));
2274   tl_assert(sameKindedAtoms(atom1,vatom1));
2275   tl_assert(sameKindedAtoms(atom2,vatom2));
2276   tl_assert(sameKindedAtoms(atom3,vatom3));
2277   tl_assert(sameKindedAtoms(atom4,vatom4));
2278   switch (op) {
2279      case Iop_MAddF64:
2280      case Iop_MAddF64r32:
2281      case Iop_MSubF64:
2282      case Iop_MSubF64r32:
2283         /* I32(rm) x F64 x F64 x F64 -> F64 */
2284         return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2285
2286      case Iop_MAddF32:
2287      case Iop_MSubF32:
2288         /* I32(rm) x F32 x F32 x F32 -> F32 */
2289         return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2290
2291      default:
2292         ppIROp(op);
2293         VG_(tool_panic)("memcheck:expr2vbits_Qop");
2294   }
2295}
2296
2297
2298static
2299IRAtom* expr2vbits_Triop ( MCEnv* mce,
2300                           IROp op,
2301                           IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2302{
2303   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2304   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2305   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2306
2307   tl_assert(isOriginalAtom(mce,atom1));
2308   tl_assert(isOriginalAtom(mce,atom2));
2309   tl_assert(isOriginalAtom(mce,atom3));
2310   tl_assert(isShadowAtom(mce,vatom1));
2311   tl_assert(isShadowAtom(mce,vatom2));
2312   tl_assert(isShadowAtom(mce,vatom3));
2313   tl_assert(sameKindedAtoms(atom1,vatom1));
2314   tl_assert(sameKindedAtoms(atom2,vatom2));
2315   tl_assert(sameKindedAtoms(atom3,vatom3));
2316   switch (op) {
2317      case Iop_AddF128:
2318      case Iop_SubF128:
2319      case Iop_MulF128:
2320      case Iop_DivF128:
2321         /* I32(rm) x F128 x F128 -> F128 */
2322         return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2323      case Iop_AddF64:
2324      case Iop_AddF64r32:
2325      case Iop_SubF64:
2326      case Iop_SubF64r32:
2327      case Iop_MulF64:
2328      case Iop_MulF64r32:
2329      case Iop_DivF64:
2330      case Iop_DivF64r32:
2331      case Iop_ScaleF64:
2332      case Iop_Yl2xF64:
2333      case Iop_Yl2xp1F64:
2334      case Iop_AtanF64:
2335      case Iop_PRemF64:
2336      case Iop_PRem1F64:
2337         /* I32(rm) x F64 x F64 -> F64 */
2338         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2339      case Iop_PRemC3210F64:
2340      case Iop_PRem1C3210F64:
2341         /* I32(rm) x F64 x F64 -> I32 */
2342         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2343      case Iop_AddF32:
2344      case Iop_SubF32:
2345      case Iop_MulF32:
2346      case Iop_DivF32:
2347         /* I32(rm) x F32 x F32 -> I32 */
2348         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2349      case Iop_ExtractV128:
2350         complainIfUndefined(mce, atom3);
2351         return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2352      case Iop_Extract64:
2353         complainIfUndefined(mce, atom3);
2354         return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2355      case Iop_SetElem8x8:
2356      case Iop_SetElem16x4:
2357      case Iop_SetElem32x2:
2358         complainIfUndefined(mce, atom2);
2359         return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2360      default:
2361         ppIROp(op);
2362         VG_(tool_panic)("memcheck:expr2vbits_Triop");
2363   }
2364}
2365
2366
2367static
2368IRAtom* expr2vbits_Binop ( MCEnv* mce,
2369                           IROp op,
2370                           IRAtom* atom1, IRAtom* atom2 )
2371{
2372   IRType  and_or_ty;
2373   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2374   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2375   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2376
2377   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2378   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2379
2380   tl_assert(isOriginalAtom(mce,atom1));
2381   tl_assert(isOriginalAtom(mce,atom2));
2382   tl_assert(isShadowAtom(mce,vatom1));
2383   tl_assert(isShadowAtom(mce,vatom2));
2384   tl_assert(sameKindedAtoms(atom1,vatom1));
2385   tl_assert(sameKindedAtoms(atom2,vatom2));
2386   switch (op) {
2387
2388      /* 32-bit SIMD */
2389
2390      case Iop_Add16x2:
2391      case Iop_HAdd16Ux2:
2392      case Iop_HAdd16Sx2:
2393      case Iop_Sub16x2:
2394      case Iop_HSub16Ux2:
2395      case Iop_HSub16Sx2:
2396      case Iop_QAdd16Sx2:
2397      case Iop_QSub16Sx2:
2398         return binary16Ix2(mce, vatom1, vatom2);
2399
2400      case Iop_Add8x4:
2401      case Iop_HAdd8Ux4:
2402      case Iop_HAdd8Sx4:
2403      case Iop_Sub8x4:
2404      case Iop_HSub8Ux4:
2405      case Iop_HSub8Sx4:
2406      case Iop_QSub8Ux4:
2407      case Iop_QAdd8Ux4:
2408      case Iop_QSub8Sx4:
2409      case Iop_QAdd8Sx4:
2410         return binary8Ix4(mce, vatom1, vatom2);
2411
2412      /* 64-bit SIMD */
2413
2414      case Iop_ShrN8x8:
2415      case Iop_ShrN16x4:
2416      case Iop_ShrN32x2:
2417      case Iop_SarN8x8:
2418      case Iop_SarN16x4:
2419      case Iop_SarN32x2:
2420      case Iop_ShlN16x4:
2421      case Iop_ShlN32x2:
2422      case Iop_ShlN8x8:
2423         /* Same scheme as with all other shifts. */
2424         complainIfUndefined(mce, atom2);
2425         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2426
2427      case Iop_QNarrowBin32Sto16Sx4:
2428      case Iop_QNarrowBin16Sto8Sx8:
2429      case Iop_QNarrowBin16Sto8Ux8:
2430         return vectorNarrowBin64(mce, op, vatom1, vatom2);
2431
2432      case Iop_Min8Ux8:
2433      case Iop_Min8Sx8:
2434      case Iop_Max8Ux8:
2435      case Iop_Max8Sx8:
2436      case Iop_Avg8Ux8:
2437      case Iop_QSub8Sx8:
2438      case Iop_QSub8Ux8:
2439      case Iop_Sub8x8:
2440      case Iop_CmpGT8Sx8:
2441      case Iop_CmpGT8Ux8:
2442      case Iop_CmpEQ8x8:
2443      case Iop_QAdd8Sx8:
2444      case Iop_QAdd8Ux8:
2445      case Iop_QSal8x8:
2446      case Iop_QShl8x8:
2447      case Iop_Add8x8:
2448      case Iop_Mul8x8:
2449      case Iop_PolynomialMul8x8:
2450         return binary8Ix8(mce, vatom1, vatom2);
2451
2452      case Iop_Min16Sx4:
2453      case Iop_Min16Ux4:
2454      case Iop_Max16Sx4:
2455      case Iop_Max16Ux4:
2456      case Iop_Avg16Ux4:
2457      case Iop_QSub16Ux4:
2458      case Iop_QSub16Sx4:
2459      case Iop_Sub16x4:
2460      case Iop_Mul16x4:
2461      case Iop_MulHi16Sx4:
2462      case Iop_MulHi16Ux4:
2463      case Iop_CmpGT16Sx4:
2464      case Iop_CmpGT16Ux4:
2465      case Iop_CmpEQ16x4:
2466      case Iop_QAdd16Sx4:
2467      case Iop_QAdd16Ux4:
2468      case Iop_QSal16x4:
2469      case Iop_QShl16x4:
2470      case Iop_Add16x4:
2471      case Iop_QDMulHi16Sx4:
2472      case Iop_QRDMulHi16Sx4:
2473         return binary16Ix4(mce, vatom1, vatom2);
2474
2475      case Iop_Sub32x2:
2476      case Iop_Mul32x2:
2477      case Iop_Max32Sx2:
2478      case Iop_Max32Ux2:
2479      case Iop_Min32Sx2:
2480      case Iop_Min32Ux2:
2481      case Iop_CmpGT32Sx2:
2482      case Iop_CmpGT32Ux2:
2483      case Iop_CmpEQ32x2:
2484      case Iop_Add32x2:
2485      case Iop_QAdd32Ux2:
2486      case Iop_QAdd32Sx2:
2487      case Iop_QSub32Ux2:
2488      case Iop_QSub32Sx2:
2489      case Iop_QSal32x2:
2490      case Iop_QShl32x2:
2491      case Iop_QDMulHi32Sx2:
2492      case Iop_QRDMulHi32Sx2:
2493         return binary32Ix2(mce, vatom1, vatom2);
2494
2495      case Iop_QSub64Ux1:
2496      case Iop_QSub64Sx1:
2497      case Iop_QAdd64Ux1:
2498      case Iop_QAdd64Sx1:
2499      case Iop_QSal64x1:
2500      case Iop_QShl64x1:
2501      case Iop_Sal64x1:
2502         return binary64Ix1(mce, vatom1, vatom2);
2503
2504      case Iop_QShlN8Sx8:
2505      case Iop_QShlN8x8:
2506      case Iop_QSalN8x8:
2507         complainIfUndefined(mce, atom2);
2508         return mkPCast8x8(mce, vatom1);
2509
2510      case Iop_QShlN16Sx4:
2511      case Iop_QShlN16x4:
2512      case Iop_QSalN16x4:
2513         complainIfUndefined(mce, atom2);
2514         return mkPCast16x4(mce, vatom1);
2515
2516      case Iop_QShlN32Sx2:
2517      case Iop_QShlN32x2:
2518      case Iop_QSalN32x2:
2519         complainIfUndefined(mce, atom2);
2520         return mkPCast32x2(mce, vatom1);
2521
2522      case Iop_QShlN64Sx1:
2523      case Iop_QShlN64x1:
2524      case Iop_QSalN64x1:
2525         complainIfUndefined(mce, atom2);
2526         return mkPCast32x2(mce, vatom1);
2527
2528      case Iop_PwMax32Sx2:
2529      case Iop_PwMax32Ux2:
2530      case Iop_PwMin32Sx2:
2531      case Iop_PwMin32Ux2:
2532      case Iop_PwMax32Fx2:
2533      case Iop_PwMin32Fx2:
2534         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2535                     mkPCast32x2(mce, vatom2)));
2536
2537      case Iop_PwMax16Sx4:
2538      case Iop_PwMax16Ux4:
2539      case Iop_PwMin16Sx4:
2540      case Iop_PwMin16Ux4:
2541         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2542                     mkPCast16x4(mce, vatom2)));
2543
2544      case Iop_PwMax8Sx8:
2545      case Iop_PwMax8Ux8:
2546      case Iop_PwMin8Sx8:
2547      case Iop_PwMin8Ux8:
2548         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2549                     mkPCast8x8(mce, vatom2)));
2550
2551      case Iop_PwAdd32x2:
2552      case Iop_PwAdd32Fx2:
2553         return mkPCast32x2(mce,
2554               assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2555                     mkPCast32x2(mce, vatom2))));
2556
2557      case Iop_PwAdd16x4:
2558         return mkPCast16x4(mce,
2559               assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2560                     mkPCast16x4(mce, vatom2))));
2561
2562      case Iop_PwAdd8x8:
2563         return mkPCast8x8(mce,
2564               assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2565                     mkPCast8x8(mce, vatom2))));
2566
2567      case Iop_Shl8x8:
2568      case Iop_Shr8x8:
2569      case Iop_Sar8x8:
2570      case Iop_Sal8x8:
2571         return mkUifU64(mce,
2572                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2573                   mkPCast8x8(mce,vatom2)
2574                );
2575
2576      case Iop_Shl16x4:
2577      case Iop_Shr16x4:
2578      case Iop_Sar16x4:
2579      case Iop_Sal16x4:
2580         return mkUifU64(mce,
2581                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2582                   mkPCast16x4(mce,vatom2)
2583                );
2584
2585      case Iop_Shl32x2:
2586      case Iop_Shr32x2:
2587      case Iop_Sar32x2:
2588      case Iop_Sal32x2:
2589         return mkUifU64(mce,
2590                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2591                   mkPCast32x2(mce,vatom2)
2592                );
2593
2594      /* 64-bit data-steering */
2595      case Iop_InterleaveLO32x2:
2596      case Iop_InterleaveLO16x4:
2597      case Iop_InterleaveLO8x8:
2598      case Iop_InterleaveHI32x2:
2599      case Iop_InterleaveHI16x4:
2600      case Iop_InterleaveHI8x8:
2601      case Iop_CatOddLanes8x8:
2602      case Iop_CatEvenLanes8x8:
2603      case Iop_CatOddLanes16x4:
2604      case Iop_CatEvenLanes16x4:
2605      case Iop_InterleaveOddLanes8x8:
2606      case Iop_InterleaveEvenLanes8x8:
2607      case Iop_InterleaveOddLanes16x4:
2608      case Iop_InterleaveEvenLanes16x4:
2609         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2610
2611      case Iop_GetElem8x8:
2612         complainIfUndefined(mce, atom2);
2613         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2614      case Iop_GetElem16x4:
2615         complainIfUndefined(mce, atom2);
2616         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2617      case Iop_GetElem32x2:
2618         complainIfUndefined(mce, atom2);
2619         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2620
2621      /* Perm8x8: rearrange values in left arg using steering values
2622        from right arg.  So rearrange the vbits in the same way but
2623        pessimise wrt steering values. */
2624      case Iop_Perm8x8:
2625         return mkUifU64(
2626                   mce,
2627                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2628                   mkPCast8x8(mce, vatom2)
2629                );
2630
2631      /* V128-bit SIMD */
2632
2633      case Iop_ShrN8x16:
2634      case Iop_ShrN16x8:
2635      case Iop_ShrN32x4:
2636      case Iop_ShrN64x2:
2637      case Iop_SarN8x16:
2638      case Iop_SarN16x8:
2639      case Iop_SarN32x4:
2640      case Iop_SarN64x2:
2641      case Iop_ShlN8x16:
2642      case Iop_ShlN16x8:
2643      case Iop_ShlN32x4:
2644      case Iop_ShlN64x2:
2645         /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2646            this is wrong now, scalar shifts are done properly lazily.
2647            Vector shifts should be fixed too. */
2648         complainIfUndefined(mce, atom2);
2649         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2650
2651      /* V x V shifts/rotates are done using the standard lazy scheme. */
2652      case Iop_Shl8x16:
2653      case Iop_Shr8x16:
2654      case Iop_Sar8x16:
2655      case Iop_Sal8x16:
2656      case Iop_Rol8x16:
2657         return mkUifUV128(mce,
2658                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2659                   mkPCast8x16(mce,vatom2)
2660                );
2661
2662      case Iop_Shl16x8:
2663      case Iop_Shr16x8:
2664      case Iop_Sar16x8:
2665      case Iop_Sal16x8:
2666      case Iop_Rol16x8:
2667         return mkUifUV128(mce,
2668                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2669                   mkPCast16x8(mce,vatom2)
2670                );
2671
2672      case Iop_Shl32x4:
2673      case Iop_Shr32x4:
2674      case Iop_Sar32x4:
2675      case Iop_Sal32x4:
2676      case Iop_Rol32x4:
2677         return mkUifUV128(mce,
2678                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2679                   mkPCast32x4(mce,vatom2)
2680                );
2681
2682      case Iop_Shl64x2:
2683      case Iop_Shr64x2:
2684      case Iop_Sar64x2:
2685      case Iop_Sal64x2:
2686         return mkUifUV128(mce,
2687                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2688                   mkPCast64x2(mce,vatom2)
2689                );
2690
2691      case Iop_F32ToFixed32Ux4_RZ:
2692      case Iop_F32ToFixed32Sx4_RZ:
2693      case Iop_Fixed32UToF32x4_RN:
2694      case Iop_Fixed32SToF32x4_RN:
2695         complainIfUndefined(mce, atom2);
2696         return mkPCast32x4(mce, vatom1);
2697
2698      case Iop_F32ToFixed32Ux2_RZ:
2699      case Iop_F32ToFixed32Sx2_RZ:
2700      case Iop_Fixed32UToF32x2_RN:
2701      case Iop_Fixed32SToF32x2_RN:
2702         complainIfUndefined(mce, atom2);
2703         return mkPCast32x2(mce, vatom1);
2704
2705      case Iop_QSub8Ux16:
2706      case Iop_QSub8Sx16:
2707      case Iop_Sub8x16:
2708      case Iop_Min8Ux16:
2709      case Iop_Min8Sx16:
2710      case Iop_Max8Ux16:
2711      case Iop_Max8Sx16:
2712      case Iop_CmpGT8Sx16:
2713      case Iop_CmpGT8Ux16:
2714      case Iop_CmpEQ8x16:
2715      case Iop_Avg8Ux16:
2716      case Iop_Avg8Sx16:
2717      case Iop_QAdd8Ux16:
2718      case Iop_QAdd8Sx16:
2719      case Iop_QSal8x16:
2720      case Iop_QShl8x16:
2721      case Iop_Add8x16:
2722      case Iop_Mul8x16:
2723      case Iop_PolynomialMul8x16:
2724         return binary8Ix16(mce, vatom1, vatom2);
2725
2726      case Iop_QSub16Ux8:
2727      case Iop_QSub16Sx8:
2728      case Iop_Sub16x8:
2729      case Iop_Mul16x8:
2730      case Iop_MulHi16Sx8:
2731      case Iop_MulHi16Ux8:
2732      case Iop_Min16Sx8:
2733      case Iop_Min16Ux8:
2734      case Iop_Max16Sx8:
2735      case Iop_Max16Ux8:
2736      case Iop_CmpGT16Sx8:
2737      case Iop_CmpGT16Ux8:
2738      case Iop_CmpEQ16x8:
2739      case Iop_Avg16Ux8:
2740      case Iop_Avg16Sx8:
2741      case Iop_QAdd16Ux8:
2742      case Iop_QAdd16Sx8:
2743      case Iop_QSal16x8:
2744      case Iop_QShl16x8:
2745      case Iop_Add16x8:
2746      case Iop_QDMulHi16Sx8:
2747      case Iop_QRDMulHi16Sx8:
2748         return binary16Ix8(mce, vatom1, vatom2);
2749
2750      case Iop_Sub32x4:
2751      case Iop_CmpGT32Sx4:
2752      case Iop_CmpGT32Ux4:
2753      case Iop_CmpEQ32x4:
2754      case Iop_QAdd32Sx4:
2755      case Iop_QAdd32Ux4:
2756      case Iop_QSub32Sx4:
2757      case Iop_QSub32Ux4:
2758      case Iop_QSal32x4:
2759      case Iop_QShl32x4:
2760      case Iop_Avg32Ux4:
2761      case Iop_Avg32Sx4:
2762      case Iop_Add32x4:
2763      case Iop_Max32Ux4:
2764      case Iop_Max32Sx4:
2765      case Iop_Min32Ux4:
2766      case Iop_Min32Sx4:
2767      case Iop_Mul32x4:
2768      case Iop_QDMulHi32Sx4:
2769      case Iop_QRDMulHi32Sx4:
2770         return binary32Ix4(mce, vatom1, vatom2);
2771
2772      case Iop_Sub64x2:
2773      case Iop_Add64x2:
2774      case Iop_CmpEQ64x2:
2775      case Iop_CmpGT64Sx2:
2776      case Iop_QSal64x2:
2777      case Iop_QShl64x2:
2778      case Iop_QAdd64Ux2:
2779      case Iop_QAdd64Sx2:
2780      case Iop_QSub64Ux2:
2781      case Iop_QSub64Sx2:
2782         return binary64Ix2(mce, vatom1, vatom2);
2783
2784      case Iop_QNarrowBin32Sto16Sx8:
2785      case Iop_QNarrowBin32Uto16Ux8:
2786      case Iop_QNarrowBin32Sto16Ux8:
2787      case Iop_QNarrowBin16Sto8Sx16:
2788      case Iop_QNarrowBin16Uto8Ux16:
2789      case Iop_QNarrowBin16Sto8Ux16:
2790         return vectorNarrowBinV128(mce, op, vatom1, vatom2);
2791
2792      case Iop_Sub64Fx2:
2793      case Iop_Mul64Fx2:
2794      case Iop_Min64Fx2:
2795      case Iop_Max64Fx2:
2796      case Iop_Div64Fx2:
2797      case Iop_CmpLT64Fx2:
2798      case Iop_CmpLE64Fx2:
2799      case Iop_CmpEQ64Fx2:
2800      case Iop_CmpUN64Fx2:
2801      case Iop_Add64Fx2:
2802         return binary64Fx2(mce, vatom1, vatom2);
2803
2804      case Iop_Sub64F0x2:
2805      case Iop_Mul64F0x2:
2806      case Iop_Min64F0x2:
2807      case Iop_Max64F0x2:
2808      case Iop_Div64F0x2:
2809      case Iop_CmpLT64F0x2:
2810      case Iop_CmpLE64F0x2:
2811      case Iop_CmpEQ64F0x2:
2812      case Iop_CmpUN64F0x2:
2813      case Iop_Add64F0x2:
2814         return binary64F0x2(mce, vatom1, vatom2);
2815
2816      case Iop_Sub32Fx4:
2817      case Iop_Mul32Fx4:
2818      case Iop_Min32Fx4:
2819      case Iop_Max32Fx4:
2820      case Iop_Div32Fx4:
2821      case Iop_CmpLT32Fx4:
2822      case Iop_CmpLE32Fx4:
2823      case Iop_CmpEQ32Fx4:
2824      case Iop_CmpUN32Fx4:
2825      case Iop_CmpGT32Fx4:
2826      case Iop_CmpGE32Fx4:
2827      case Iop_Add32Fx4:
2828      case Iop_Recps32Fx4:
2829      case Iop_Rsqrts32Fx4:
2830         return binary32Fx4(mce, vatom1, vatom2);
2831
2832      case Iop_Sub32Fx2:
2833      case Iop_Mul32Fx2:
2834      case Iop_Min32Fx2:
2835      case Iop_Max32Fx2:
2836      case Iop_CmpEQ32Fx2:
2837      case Iop_CmpGT32Fx2:
2838      case Iop_CmpGE32Fx2:
2839      case Iop_Add32Fx2:
2840      case Iop_Recps32Fx2:
2841      case Iop_Rsqrts32Fx2:
2842         return binary32Fx2(mce, vatom1, vatom2);
2843
2844      case Iop_Sub32F0x4:
2845      case Iop_Mul32F0x4:
2846      case Iop_Min32F0x4:
2847      case Iop_Max32F0x4:
2848      case Iop_Div32F0x4:
2849      case Iop_CmpLT32F0x4:
2850      case Iop_CmpLE32F0x4:
2851      case Iop_CmpEQ32F0x4:
2852      case Iop_CmpUN32F0x4:
2853      case Iop_Add32F0x4:
2854         return binary32F0x4(mce, vatom1, vatom2);
2855
2856      case Iop_QShlN8Sx16:
2857      case Iop_QShlN8x16:
2858      case Iop_QSalN8x16:
2859         complainIfUndefined(mce, atom2);
2860         return mkPCast8x16(mce, vatom1);
2861
2862      case Iop_QShlN16Sx8:
2863      case Iop_QShlN16x8:
2864      case Iop_QSalN16x8:
2865         complainIfUndefined(mce, atom2);
2866         return mkPCast16x8(mce, vatom1);
2867
2868      case Iop_QShlN32Sx4:
2869      case Iop_QShlN32x4:
2870      case Iop_QSalN32x4:
2871         complainIfUndefined(mce, atom2);
2872         return mkPCast32x4(mce, vatom1);
2873
2874      case Iop_QShlN64Sx2:
2875      case Iop_QShlN64x2:
2876      case Iop_QSalN64x2:
2877         complainIfUndefined(mce, atom2);
2878         return mkPCast32x4(mce, vatom1);
2879
2880      case Iop_Mull32Sx2:
2881      case Iop_Mull32Ux2:
2882      case Iop_QDMulLong32Sx2:
2883         return vectorWidenI64(mce, Iop_Widen32Sto64x2,
2884                                    mkUifU64(mce, vatom1, vatom2));
2885
2886      case Iop_Mull16Sx4:
2887      case Iop_Mull16Ux4:
2888      case Iop_QDMulLong16Sx4:
2889         return vectorWidenI64(mce, Iop_Widen16Sto32x4,
2890                                    mkUifU64(mce, vatom1, vatom2));
2891
2892      case Iop_Mull8Sx8:
2893      case Iop_Mull8Ux8:
2894      case Iop_PolynomialMull8x8:
2895         return vectorWidenI64(mce, Iop_Widen8Sto16x8,
2896                                    mkUifU64(mce, vatom1, vatom2));
2897
2898      case Iop_PwAdd32x4:
2899         return mkPCast32x4(mce,
2900               assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2901                     mkPCast32x4(mce, vatom2))));
2902
2903      case Iop_PwAdd16x8:
2904         return mkPCast16x8(mce,
2905               assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2906                     mkPCast16x8(mce, vatom2))));
2907
2908      case Iop_PwAdd8x16:
2909         return mkPCast8x16(mce,
2910               assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2911                     mkPCast8x16(mce, vatom2))));
2912
2913      /* V128-bit data-steering */
2914      case Iop_SetV128lo32:
2915      case Iop_SetV128lo64:
2916      case Iop_64HLtoV128:
2917      case Iop_InterleaveLO64x2:
2918      case Iop_InterleaveLO32x4:
2919      case Iop_InterleaveLO16x8:
2920      case Iop_InterleaveLO8x16:
2921      case Iop_InterleaveHI64x2:
2922      case Iop_InterleaveHI32x4:
2923      case Iop_InterleaveHI16x8:
2924      case Iop_InterleaveHI8x16:
2925      case Iop_CatOddLanes8x16:
2926      case Iop_CatOddLanes16x8:
2927      case Iop_CatOddLanes32x4:
2928      case Iop_CatEvenLanes8x16:
2929      case Iop_CatEvenLanes16x8:
2930      case Iop_CatEvenLanes32x4:
2931      case Iop_InterleaveOddLanes8x16:
2932      case Iop_InterleaveOddLanes16x8:
2933      case Iop_InterleaveOddLanes32x4:
2934      case Iop_InterleaveEvenLanes8x16:
2935      case Iop_InterleaveEvenLanes16x8:
2936      case Iop_InterleaveEvenLanes32x4:
2937         return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
2938
2939      case Iop_GetElem8x16:
2940         complainIfUndefined(mce, atom2);
2941         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2942      case Iop_GetElem16x8:
2943         complainIfUndefined(mce, atom2);
2944         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2945      case Iop_GetElem32x4:
2946         complainIfUndefined(mce, atom2);
2947         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2948      case Iop_GetElem64x2:
2949         complainIfUndefined(mce, atom2);
2950         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2951
2952     /* Perm8x16: rearrange values in left arg using steering values
2953        from right arg.  So rearrange the vbits in the same way but
2954        pessimise wrt steering values. */
2955      case Iop_Perm8x16:
2956         return mkUifUV128(
2957                   mce,
2958                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2959                   mkPCast8x16(mce, vatom2)
2960                );
2961
2962     /* These two take the lower half of each 16-bit lane, sign/zero
2963        extend it to 32, and multiply together, producing a 32x4
2964        result (and implicitly ignoring half the operand bits).  So
2965        treat it as a bunch of independent 16x8 operations, but then
2966        do 32-bit shifts left-right to copy the lower half results
2967        (which are all 0s or all 1s due to PCasting in binary16Ix8)
2968        into the upper half of each result lane. */
2969      case Iop_MullEven16Ux8:
2970      case Iop_MullEven16Sx8: {
2971         IRAtom* at;
2972         at = binary16Ix8(mce,vatom1,vatom2);
2973         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2974         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
2975	 return at;
2976      }
2977
2978      /* Same deal as Iop_MullEven16{S,U}x8 */
2979      case Iop_MullEven8Ux16:
2980      case Iop_MullEven8Sx16: {
2981         IRAtom* at;
2982         at = binary8Ix16(mce,vatom1,vatom2);
2983         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2984         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
2985	 return at;
2986      }
2987
2988      /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2989         32x4 -> 16x8 laneage, discarding the upper half of each lane.
2990         Simply apply same op to the V bits, since this really no more
2991         than a data steering operation. */
2992      case Iop_NarrowBin32to16x8:
2993      case Iop_NarrowBin16to8x16:
2994         return assignNew('V', mce, Ity_V128,
2995                                    binop(op, vatom1, vatom2));
2996
2997      case Iop_ShrV128:
2998      case Iop_ShlV128:
2999         /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3000            this is wrong now, scalar shifts are done properly lazily.
3001            Vector shifts should be fixed too. */
3002         complainIfUndefined(mce, atom2);
3003         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3004
3005      /* I128-bit data-steering */
3006      case Iop_64HLto128:
3007         return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3008
3009      /* Scalar floating point */
3010
3011      case Iop_F32toI64S:
3012         /* I32(rm) x F32 -> I64 */
3013         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3014
3015      case Iop_I64StoF32:
3016         /* I32(rm) x I64 -> F32 */
3017         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3018
3019      case Iop_RoundF64toInt:
3020      case Iop_RoundF64toF32:
3021      case Iop_F64toI64S:
3022      case Iop_F64toI64U:
3023      case Iop_I64StoF64:
3024      case Iop_I64UtoF64:
3025      case Iop_SinF64:
3026      case Iop_CosF64:
3027      case Iop_TanF64:
3028      case Iop_2xm1F64:
3029      case Iop_SqrtF64:
3030         /* I32(rm) x I64/F64 -> I64/F64 */
3031         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3032
3033      case Iop_RoundF32toInt:
3034      case Iop_SqrtF32:
3035         /* I32(rm) x I32/F32 -> I32/F32 */
3036         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3037
3038      case Iop_SqrtF128:
3039         /* I32(rm) x F128 -> F128 */
3040         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3041
3042      case Iop_I32StoF32:
3043      case Iop_F32toI32S:
3044         /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3045         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3046
3047      case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3048      case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3049         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3050
3051      case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3052      case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3053         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3054
3055      case Iop_F64HLtoF128:
3056         return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vatom1, vatom2));
3057
3058      case Iop_F64toI32U:
3059      case Iop_F64toI32S:
3060      case Iop_F64toF32:
3061      case Iop_I64UtoF32:
3062         /* First arg is I32 (rounding mode), second is F64 (data). */
3063         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3064
3065      case Iop_F64toI16S:
3066         /* First arg is I32 (rounding mode), second is F64 (data). */
3067         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3068
3069      case Iop_CmpF32:
3070      case Iop_CmpF64:
3071      case Iop_CmpF128:
3072         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3073
3074      /* non-FP after here */
3075
3076      case Iop_DivModU64to32:
3077      case Iop_DivModS64to32:
3078         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3079
3080      case Iop_DivModU128to64:
3081      case Iop_DivModS128to64:
3082         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3083
3084      case Iop_16HLto32:
3085         return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3086      case Iop_32HLto64:
3087         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3088
3089      case Iop_DivModS64to64:
3090      case Iop_MullS64:
3091      case Iop_MullU64: {
3092         IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3093         IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3094         return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
3095      }
3096
3097      case Iop_MullS32:
3098      case Iop_MullU32: {
3099         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3100         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3101         return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
3102      }
3103
3104      case Iop_MullS16:
3105      case Iop_MullU16: {
3106         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3107         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3108         return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
3109      }
3110
3111      case Iop_MullS8:
3112      case Iop_MullU8: {
3113         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3114         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3115         return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3116      }
3117
3118      case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3119      case Iop_DivS32:
3120      case Iop_DivU32:
3121      case Iop_DivU32E:
3122      case Iop_DivS32E:
3123         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3124
3125      case Iop_DivS64:
3126      case Iop_DivU64:
3127      case Iop_DivS64E:
3128      case Iop_DivU64E:
3129         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3130
3131      case Iop_Add32:
3132         if (mce->bogusLiterals)
3133            return expensiveAddSub(mce,True,Ity_I32,
3134                                   vatom1,vatom2, atom1,atom2);
3135         else
3136            goto cheap_AddSub32;
3137      case Iop_Sub32:
3138         if (mce->bogusLiterals)
3139            return expensiveAddSub(mce,False,Ity_I32,
3140                                   vatom1,vatom2, atom1,atom2);
3141         else
3142            goto cheap_AddSub32;
3143
3144      cheap_AddSub32:
3145      case Iop_Mul32:
3146         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3147
3148      case Iop_CmpORD32S:
3149      case Iop_CmpORD32U:
3150      case Iop_CmpORD64S:
3151      case Iop_CmpORD64U:
3152         return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3153
3154      case Iop_Add64:
3155         if (mce->bogusLiterals)
3156            return expensiveAddSub(mce,True,Ity_I64,
3157                                   vatom1,vatom2, atom1,atom2);
3158         else
3159            goto cheap_AddSub64;
3160      case Iop_Sub64:
3161         if (mce->bogusLiterals)
3162            return expensiveAddSub(mce,False,Ity_I64,
3163                                   vatom1,vatom2, atom1,atom2);
3164         else
3165            goto cheap_AddSub64;
3166
3167      cheap_AddSub64:
3168      case Iop_Mul64:
3169         return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3170
3171      case Iop_Mul16:
3172      case Iop_Add16:
3173      case Iop_Sub16:
3174         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3175
3176      case Iop_Sub8:
3177      case Iop_Add8:
3178         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3179
3180      case Iop_CmpEQ64:
3181      case Iop_CmpNE64:
3182         if (mce->bogusLiterals)
3183            return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3184         else
3185            goto cheap_cmp64;
3186      cheap_cmp64:
3187      case Iop_CmpLE64S: case Iop_CmpLE64U:
3188      case Iop_CmpLT64U: case Iop_CmpLT64S:
3189         return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3190
3191      case Iop_CmpEQ32:
3192      case Iop_CmpNE32:
3193         if (mce->bogusLiterals)
3194            return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3195         else
3196            goto cheap_cmp32;
3197      cheap_cmp32:
3198      case Iop_CmpLE32S: case Iop_CmpLE32U:
3199      case Iop_CmpLT32U: case Iop_CmpLT32S:
3200         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3201
3202      case Iop_CmpEQ16: case Iop_CmpNE16:
3203         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3204
3205      case Iop_CmpEQ8: case Iop_CmpNE8:
3206         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3207
3208      case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3209      case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3210      case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3211      case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3212         /* Just say these all produce a defined result, regardless
3213            of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3214         return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3215
3216      case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3217         return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3218
3219      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3220         return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3221
3222      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3223         return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3224
3225      case Iop_Shl8: case Iop_Shr8:
3226         return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3227
3228      case Iop_AndV128:
3229         uifu = mkUifUV128; difd = mkDifDV128;
3230         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3231      case Iop_And64:
3232         uifu = mkUifU64; difd = mkDifD64;
3233         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3234      case Iop_And32:
3235         uifu = mkUifU32; difd = mkDifD32;
3236         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3237      case Iop_And16:
3238         uifu = mkUifU16; difd = mkDifD16;
3239         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3240      case Iop_And8:
3241         uifu = mkUifU8; difd = mkDifD8;
3242         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3243
3244      case Iop_OrV128:
3245         uifu = mkUifUV128; difd = mkDifDV128;
3246         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3247      case Iop_Or64:
3248         uifu = mkUifU64; difd = mkDifD64;
3249         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3250      case Iop_Or32:
3251         uifu = mkUifU32; difd = mkDifD32;
3252         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3253      case Iop_Or16:
3254         uifu = mkUifU16; difd = mkDifD16;
3255         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3256      case Iop_Or8:
3257         uifu = mkUifU8; difd = mkDifD8;
3258         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3259
3260      do_And_Or:
3261         return
3262         assignNew(
3263            'V', mce,
3264            and_or_ty,
3265            difd(mce, uifu(mce, vatom1, vatom2),
3266                      difd(mce, improve(mce, atom1, vatom1),
3267                                improve(mce, atom2, vatom2) ) ) );
3268
3269      case Iop_Xor8:
3270         return mkUifU8(mce, vatom1, vatom2);
3271      case Iop_Xor16:
3272         return mkUifU16(mce, vatom1, vatom2);
3273      case Iop_Xor32:
3274         return mkUifU32(mce, vatom1, vatom2);
3275      case Iop_Xor64:
3276         return mkUifU64(mce, vatom1, vatom2);
3277      case Iop_XorV128:
3278         return mkUifUV128(mce, vatom1, vatom2);
3279
3280      default:
3281         ppIROp(op);
3282         VG_(tool_panic)("memcheck:expr2vbits_Binop");
3283   }
3284}
3285
3286
3287static
3288IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3289{
3290   IRAtom* vatom = expr2vbits( mce, atom );
3291   tl_assert(isOriginalAtom(mce,atom));
3292   switch (op) {
3293
3294      case Iop_Sqrt64Fx2:
3295         return unary64Fx2(mce, vatom);
3296
3297      case Iop_Sqrt64F0x2:
3298         return unary64F0x2(mce, vatom);
3299
3300      case Iop_Sqrt32Fx4:
3301      case Iop_RSqrt32Fx4:
3302      case Iop_Recip32Fx4:
3303      case Iop_I32UtoFx4:
3304      case Iop_I32StoFx4:
3305      case Iop_QFtoI32Ux4_RZ:
3306      case Iop_QFtoI32Sx4_RZ:
3307      case Iop_RoundF32x4_RM:
3308      case Iop_RoundF32x4_RP:
3309      case Iop_RoundF32x4_RN:
3310      case Iop_RoundF32x4_RZ:
3311      case Iop_Recip32x4:
3312      case Iop_Abs32Fx4:
3313      case Iop_Neg32Fx4:
3314      case Iop_Rsqrte32Fx4:
3315         return unary32Fx4(mce, vatom);
3316
3317      case Iop_I32UtoFx2:
3318      case Iop_I32StoFx2:
3319      case Iop_Recip32Fx2:
3320      case Iop_Recip32x2:
3321      case Iop_Abs32Fx2:
3322      case Iop_Neg32Fx2:
3323      case Iop_Rsqrte32Fx2:
3324         return unary32Fx2(mce, vatom);
3325
3326      case Iop_Sqrt32F0x4:
3327      case Iop_RSqrt32F0x4:
3328      case Iop_Recip32F0x4:
3329         return unary32F0x4(mce, vatom);
3330
3331      case Iop_32UtoV128:
3332      case Iop_64UtoV128:
3333      case Iop_Dup8x16:
3334      case Iop_Dup16x8:
3335      case Iop_Dup32x4:
3336      case Iop_Reverse16_8x16:
3337      case Iop_Reverse32_8x16:
3338      case Iop_Reverse32_16x8:
3339      case Iop_Reverse64_8x16:
3340      case Iop_Reverse64_16x8:
3341      case Iop_Reverse64_32x4:
3342         return assignNew('V', mce, Ity_V128, unop(op, vatom));
3343
3344      case Iop_F128HItoF64:  /* F128 -> high half of F128 */
3345         return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3346      case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
3347         return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3348
3349      case Iop_NegF128:
3350      case Iop_AbsF128:
3351         return mkPCastTo(mce, Ity_I128, vatom);
3352
3353      case Iop_I32StoF128: /* signed I32 -> F128 */
3354      case Iop_I64StoF128: /* signed I64 -> F128 */
3355      case Iop_F32toF128:  /* F32 -> F128 */
3356      case Iop_F64toF128:  /* F64 -> F128 */
3357         return mkPCastTo(mce, Ity_I128, vatom);
3358
3359      case Iop_F32toF64:
3360      case Iop_I32StoF64:
3361      case Iop_I32UtoF64:
3362      case Iop_NegF64:
3363      case Iop_AbsF64:
3364      case Iop_Est5FRSqrt:
3365      case Iop_RoundF64toF64_NEAREST:
3366      case Iop_RoundF64toF64_NegINF:
3367      case Iop_RoundF64toF64_PosINF:
3368      case Iop_RoundF64toF64_ZERO:
3369      case Iop_Clz64:
3370      case Iop_Ctz64:
3371         return mkPCastTo(mce, Ity_I64, vatom);
3372
3373      case Iop_Clz32:
3374      case Iop_Ctz32:
3375      case Iop_TruncF64asF32:
3376      case Iop_NegF32:
3377      case Iop_AbsF32:
3378         return mkPCastTo(mce, Ity_I32, vatom);
3379
3380      case Iop_1Uto64:
3381      case Iop_1Sto64:
3382      case Iop_8Uto64:
3383      case Iop_8Sto64:
3384      case Iop_16Uto64:
3385      case Iop_16Sto64:
3386      case Iop_32Sto64:
3387      case Iop_32Uto64:
3388      case Iop_V128to64:
3389      case Iop_V128HIto64:
3390      case Iop_128HIto64:
3391      case Iop_128to64:
3392      case Iop_Dup8x8:
3393      case Iop_Dup16x4:
3394      case Iop_Dup32x2:
3395      case Iop_Reverse16_8x8:
3396      case Iop_Reverse32_8x8:
3397      case Iop_Reverse32_16x4:
3398      case Iop_Reverse64_8x8:
3399      case Iop_Reverse64_16x4:
3400      case Iop_Reverse64_32x2:
3401         return assignNew('V', mce, Ity_I64, unop(op, vatom));
3402
3403      case Iop_I16StoF32:
3404      case Iop_64to32:
3405      case Iop_64HIto32:
3406      case Iop_1Uto32:
3407      case Iop_1Sto32:
3408      case Iop_8Uto32:
3409      case Iop_16Uto32:
3410      case Iop_16Sto32:
3411      case Iop_8Sto32:
3412      case Iop_V128to32:
3413         return assignNew('V', mce, Ity_I32, unop(op, vatom));
3414
3415      case Iop_8Sto16:
3416      case Iop_8Uto16:
3417      case Iop_32to16:
3418      case Iop_32HIto16:
3419      case Iop_64to16:
3420         return assignNew('V', mce, Ity_I16, unop(op, vatom));
3421
3422      case Iop_1Uto8:
3423      case Iop_1Sto8:
3424      case Iop_16to8:
3425      case Iop_16HIto8:
3426      case Iop_32to8:
3427      case Iop_64to8:
3428         return assignNew('V', mce, Ity_I8, unop(op, vatom));
3429
3430      case Iop_32to1:
3431         return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3432
3433      case Iop_64to1:
3434         return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3435
3436      case Iop_ReinterpF64asI64:
3437      case Iop_ReinterpI64asF64:
3438      case Iop_ReinterpI32asF32:
3439      case Iop_ReinterpF32asI32:
3440      case Iop_NotV128:
3441      case Iop_Not64:
3442      case Iop_Not32:
3443      case Iop_Not16:
3444      case Iop_Not8:
3445      case Iop_Not1:
3446         return vatom;
3447
3448      case Iop_CmpNEZ8x8:
3449      case Iop_Cnt8x8:
3450      case Iop_Clz8Sx8:
3451      case Iop_Cls8Sx8:
3452      case Iop_Abs8x8:
3453         return mkPCast8x8(mce, vatom);
3454
3455      case Iop_CmpNEZ8x16:
3456      case Iop_Cnt8x16:
3457      case Iop_Clz8Sx16:
3458      case Iop_Cls8Sx16:
3459      case Iop_Abs8x16:
3460         return mkPCast8x16(mce, vatom);
3461
3462      case Iop_CmpNEZ16x4:
3463      case Iop_Clz16Sx4:
3464      case Iop_Cls16Sx4:
3465      case Iop_Abs16x4:
3466         return mkPCast16x4(mce, vatom);
3467
3468      case Iop_CmpNEZ16x8:
3469      case Iop_Clz16Sx8:
3470      case Iop_Cls16Sx8:
3471      case Iop_Abs16x8:
3472         return mkPCast16x8(mce, vatom);
3473
3474      case Iop_CmpNEZ32x2:
3475      case Iop_Clz32Sx2:
3476      case Iop_Cls32Sx2:
3477      case Iop_FtoI32Ux2_RZ:
3478      case Iop_FtoI32Sx2_RZ:
3479      case Iop_Abs32x2:
3480         return mkPCast32x2(mce, vatom);
3481
3482      case Iop_CmpNEZ32x4:
3483      case Iop_Clz32Sx4:
3484      case Iop_Cls32Sx4:
3485      case Iop_FtoI32Ux4_RZ:
3486      case Iop_FtoI32Sx4_RZ:
3487      case Iop_Abs32x4:
3488         return mkPCast32x4(mce, vatom);
3489
3490      case Iop_CmpwNEZ64:
3491         return mkPCastTo(mce, Ity_I64, vatom);
3492
3493      case Iop_CmpNEZ64x2:
3494         return mkPCast64x2(mce, vatom);
3495
3496      case Iop_NarrowUn16to8x8:
3497      case Iop_NarrowUn32to16x4:
3498      case Iop_NarrowUn64to32x2:
3499      case Iop_QNarrowUn16Sto8Sx8:
3500      case Iop_QNarrowUn16Sto8Ux8:
3501      case Iop_QNarrowUn16Uto8Ux8:
3502      case Iop_QNarrowUn32Sto16Sx4:
3503      case Iop_QNarrowUn32Sto16Ux4:
3504      case Iop_QNarrowUn32Uto16Ux4:
3505      case Iop_QNarrowUn64Sto32Sx2:
3506      case Iop_QNarrowUn64Sto32Ux2:
3507      case Iop_QNarrowUn64Uto32Ux2:
3508         return vectorNarrowUnV128(mce, op, vatom);
3509
3510      case Iop_Widen8Sto16x8:
3511      case Iop_Widen8Uto16x8:
3512      case Iop_Widen16Sto32x4:
3513      case Iop_Widen16Uto32x4:
3514      case Iop_Widen32Sto64x2:
3515      case Iop_Widen32Uto64x2:
3516         return vectorWidenI64(mce, op, vatom);
3517
3518      case Iop_PwAddL32Ux2:
3519      case Iop_PwAddL32Sx2:
3520         return mkPCastTo(mce, Ity_I64,
3521               assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3522
3523      case Iop_PwAddL16Ux4:
3524      case Iop_PwAddL16Sx4:
3525         return mkPCast32x2(mce,
3526               assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3527
3528      case Iop_PwAddL8Ux8:
3529      case Iop_PwAddL8Sx8:
3530         return mkPCast16x4(mce,
3531               assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3532
3533      case Iop_PwAddL32Ux4:
3534      case Iop_PwAddL32Sx4:
3535         return mkPCast64x2(mce,
3536               assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3537
3538      case Iop_PwAddL16Ux8:
3539      case Iop_PwAddL16Sx8:
3540         return mkPCast32x4(mce,
3541               assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3542
3543      case Iop_PwAddL8Ux16:
3544      case Iop_PwAddL8Sx16:
3545         return mkPCast16x8(mce,
3546               assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3547
3548      case Iop_I64UtoF32:
3549      default:
3550         ppIROp(op);
3551         VG_(tool_panic)("memcheck:expr2vbits_Unop");
3552   }
3553}
3554
3555
3556/* Worker function; do not call directly. */
3557static
3558IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3559                              IREndness end, IRType ty,
3560                              IRAtom* addr, UInt bias )
3561{
3562   void*    helper;
3563   Char*    hname;
3564   IRDirty* di;
3565   IRTemp   datavbits;
3566   IRAtom*  addrAct;
3567
3568   tl_assert(isOriginalAtom(mce,addr));
3569   tl_assert(end == Iend_LE || end == Iend_BE);
3570
3571   /* First, emit a definedness test for the address.  This also sets
3572      the address (shadow) to 'defined' following the test. */
3573   complainIfUndefined( mce, addr );
3574
3575   /* Now cook up a call to the relevant helper function, to read the
3576      data V bits from shadow memory. */
3577   ty = shadowTypeV(ty);
3578
3579   if (end == Iend_LE) {
3580      switch (ty) {
3581         case Ity_I64: helper = &MC_(helperc_LOADV64le);
3582                       hname = "MC_(helperc_LOADV64le)";
3583                       break;
3584         case Ity_I32: helper = &MC_(helperc_LOADV32le);
3585                       hname = "MC_(helperc_LOADV32le)";
3586                       break;
3587         case Ity_I16: helper = &MC_(helperc_LOADV16le);
3588                       hname = "MC_(helperc_LOADV16le)";
3589                       break;
3590         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3591                       hname = "MC_(helperc_LOADV8)";
3592                       break;
3593         default:      ppIRType(ty);
3594                       VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3595      }
3596   } else {
3597      switch (ty) {
3598         case Ity_I64: helper = &MC_(helperc_LOADV64be);
3599                       hname = "MC_(helperc_LOADV64be)";
3600                       break;
3601         case Ity_I32: helper = &MC_(helperc_LOADV32be);
3602                       hname = "MC_(helperc_LOADV32be)";
3603                       break;
3604         case Ity_I16: helper = &MC_(helperc_LOADV16be);
3605                       hname = "MC_(helperc_LOADV16be)";
3606                       break;
3607         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3608                       hname = "MC_(helperc_LOADV8)";
3609                       break;
3610         default:      ppIRType(ty);
3611                       VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3612      }
3613   }
3614
3615   /* Generate the actual address into addrAct. */
3616   if (bias == 0) {
3617      addrAct = addr;
3618   } else {
3619      IROp    mkAdd;
3620      IRAtom* eBias;
3621      IRType  tyAddr  = mce->hWordTy;
3622      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3623      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3624      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3625      addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3626   }
3627
3628   /* We need to have a place to park the V bits we're just about to
3629      read. */
3630   datavbits = newTemp(mce, ty, VSh);
3631   di = unsafeIRDirty_1_N( datavbits,
3632                           1/*regparms*/,
3633                           hname, VG_(fnptr_to_fnentry)( helper ),
3634                           mkIRExprVec_1( addrAct ));
3635   setHelperAnns( mce, di );
3636   stmt( 'V', mce, IRStmt_Dirty(di) );
3637
3638   return mkexpr(datavbits);
3639}
3640
3641
3642static
3643IRAtom* expr2vbits_Load ( MCEnv* mce,
3644                          IREndness end, IRType ty,
3645                          IRAtom* addr, UInt bias )
3646{
3647   IRAtom *v64hi, *v64lo;
3648   tl_assert(end == Iend_LE || end == Iend_BE);
3649   switch (shadowTypeV(ty)) {
3650      case Ity_I8:
3651      case Ity_I16:
3652      case Ity_I32:
3653      case Ity_I64:
3654         return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3655      case Ity_V128:
3656         if (end == Iend_LE) {
3657            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3658            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3659         } else {
3660            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3661            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3662         }
3663         return assignNew( 'V', mce,
3664                           Ity_V128,
3665                           binop(Iop_64HLtoV128, v64hi, v64lo));
3666      default:
3667         VG_(tool_panic)("expr2vbits_Load");
3668   }
3669}
3670
3671
3672static
3673IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3674                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3675{
3676   IRAtom *vbitsC, *vbits0, *vbitsX;
3677   IRType ty;
3678   /* Given Mux0X(cond,expr0,exprX), generate
3679         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3680      That is, steer the V bits like the originals, but trash the
3681      result if the steering value is undefined.  This gives
3682      lazy propagation. */
3683   tl_assert(isOriginalAtom(mce, cond));
3684   tl_assert(isOriginalAtom(mce, expr0));
3685   tl_assert(isOriginalAtom(mce, exprX));
3686
3687   vbitsC = expr2vbits(mce, cond);
3688   vbits0 = expr2vbits(mce, expr0);
3689   vbitsX = expr2vbits(mce, exprX);
3690   ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
3691
3692   return
3693      mkUifU(mce, ty, assignNew('V', mce, ty,
3694                                     IRExpr_Mux0X(cond, vbits0, vbitsX)),
3695                      mkPCastTo(mce, ty, vbitsC) );
3696}
3697
3698/* --------- This is the main expression-handling function. --------- */
3699
3700static
3701IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3702{
3703   switch (e->tag) {
3704
3705      case Iex_Get:
3706         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3707
3708      case Iex_GetI:
3709         return shadow_GETI( mce, e->Iex.GetI.descr,
3710                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
3711
3712      case Iex_RdTmp:
3713         return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
3714
3715      case Iex_Const:
3716         return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
3717
3718      case Iex_Qop:
3719         return expr2vbits_Qop(
3720                   mce,
3721                   e->Iex.Qop.op,
3722                   e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3723		   e->Iex.Qop.arg3, e->Iex.Qop.arg4
3724                );
3725
3726      case Iex_Triop:
3727         return expr2vbits_Triop(
3728                   mce,
3729                   e->Iex.Triop.op,
3730                   e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3731                );
3732
3733      case Iex_Binop:
3734         return expr2vbits_Binop(
3735                   mce,
3736                   e->Iex.Binop.op,
3737                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
3738                );
3739
3740      case Iex_Unop:
3741         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3742
3743      case Iex_Load:
3744         return expr2vbits_Load( mce, e->Iex.Load.end,
3745                                      e->Iex.Load.ty,
3746                                      e->Iex.Load.addr, 0/*addr bias*/ );
3747
3748      case Iex_CCall:
3749         return mkLazyN( mce, e->Iex.CCall.args,
3750                              e->Iex.CCall.retty,
3751                              e->Iex.CCall.cee );
3752
3753      case Iex_Mux0X:
3754         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3755                                       e->Iex.Mux0X.exprX);
3756
3757      default:
3758         VG_(printf)("\n");
3759         ppIRExpr(e);
3760         VG_(printf)("\n");
3761         VG_(tool_panic)("memcheck: expr2vbits");
3762   }
3763}
3764
3765/*------------------------------------------------------------*/
3766/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
3767/*------------------------------------------------------------*/
3768
3769/* Widen a value to the host word size. */
3770
3771static
3772IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
3773{
3774   IRType ty, tyH;
3775
3776   /* vatom is vbits-value and as such can only have a shadow type. */
3777   tl_assert(isShadowAtom(mce,vatom));
3778
3779   ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
3780   tyH = mce->hWordTy;
3781
3782   if (tyH == Ity_I32) {
3783      switch (ty) {
3784         case Ity_I32:
3785            return vatom;
3786         case Ity_I16:
3787            return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3788         case Ity_I8:
3789            return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3790         default:
3791            goto unhandled;
3792      }
3793   } else
3794   if (tyH == Ity_I64) {
3795      switch (ty) {
3796         case Ity_I32:
3797            return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3798         case Ity_I16:
3799            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3800                   assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3801         case Ity_I8:
3802            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3803                   assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3804         default:
3805            goto unhandled;
3806      }
3807   } else {
3808      goto unhandled;
3809   }
3810  unhandled:
3811   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3812   VG_(tool_panic)("zwidenToHostWord");
3813}
3814
3815
3816/* Generate a shadow store.  addr is always the original address atom.
3817   You can pass in either originals or V-bits for the data atom, but
3818   obviously not both.  guard :: Ity_I1 controls whether the store
3819   really happens; NULL means it unconditionally does.  Note that
3820   guard itself is not checked for definedness; the caller of this
3821   function must do that if necessary. */
3822
3823static
3824void do_shadow_Store ( MCEnv* mce,
3825                       IREndness end,
3826                       IRAtom* addr, UInt bias,
3827                       IRAtom* data, IRAtom* vdata,
3828                       IRAtom* guard )
3829{
3830   IROp     mkAdd;
3831   IRType   ty, tyAddr;
3832   void*    helper = NULL;
3833   Char*    hname = NULL;
3834   IRConst* c;
3835
3836   tyAddr = mce->hWordTy;
3837   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3838   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3839   tl_assert( end == Iend_LE || end == Iend_BE );
3840
3841   if (data) {
3842      tl_assert(!vdata);
3843      tl_assert(isOriginalAtom(mce, data));
3844      tl_assert(bias == 0);
3845      vdata = expr2vbits( mce, data );
3846   } else {
3847      tl_assert(vdata);
3848   }
3849
3850   tl_assert(isOriginalAtom(mce,addr));
3851   tl_assert(isShadowAtom(mce,vdata));
3852
3853   if (guard) {
3854      tl_assert(isOriginalAtom(mce, guard));
3855      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3856   }
3857
3858   ty = typeOfIRExpr(mce->sb->tyenv, vdata);
3859
3860   // If we're not doing undefined value checking, pretend that this value
3861   // is "all valid".  That lets Vex's optimiser remove some of the V bit
3862   // shadow computation ops that precede it.
3863   if (MC_(clo_mc_level) == 1) {
3864      switch (ty) {
3865         case Ity_V128: // V128 weirdness
3866                        c = IRConst_V128(V_BITS16_DEFINED); break;
3867         case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
3868         case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
3869         case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
3870         case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
3871         default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3872      }
3873      vdata = IRExpr_Const( c );
3874   }
3875
3876   /* First, emit a definedness test for the address.  This also sets
3877      the address (shadow) to 'defined' following the test. */
3878   complainIfUndefined( mce, addr );
3879
3880   /* Now decide which helper function to call to write the data V
3881      bits into shadow memory. */
3882   if (end == Iend_LE) {
3883      switch (ty) {
3884         case Ity_V128: /* we'll use the helper twice */
3885         case Ity_I64: helper = &MC_(helperc_STOREV64le);
3886                       hname = "MC_(helperc_STOREV64le)";
3887                       break;
3888         case Ity_I32: helper = &MC_(helperc_STOREV32le);
3889                       hname = "MC_(helperc_STOREV32le)";
3890                       break;
3891         case Ity_I16: helper = &MC_(helperc_STOREV16le);
3892                       hname = "MC_(helperc_STOREV16le)";
3893                       break;
3894         case Ity_I8:  helper = &MC_(helperc_STOREV8);
3895                       hname = "MC_(helperc_STOREV8)";
3896                       break;
3897         default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3898      }
3899   } else {
3900      switch (ty) {
3901         case Ity_V128: /* we'll use the helper twice */
3902         case Ity_I64: helper = &MC_(helperc_STOREV64be);
3903                       hname = "MC_(helperc_STOREV64be)";
3904                       break;
3905         case Ity_I32: helper = &MC_(helperc_STOREV32be);
3906                       hname = "MC_(helperc_STOREV32be)";
3907                       break;
3908         case Ity_I16: helper = &MC_(helperc_STOREV16be);
3909                       hname = "MC_(helperc_STOREV16be)";
3910                       break;
3911         case Ity_I8:  helper = &MC_(helperc_STOREV8);
3912                       hname = "MC_(helperc_STOREV8)";
3913                       break;
3914         default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3915      }
3916   }
3917
3918   if (ty == Ity_V128) {
3919
3920      /* V128-bit case */
3921      /* See comment in next clause re 64-bit regparms */
3922      /* also, need to be careful about endianness */
3923
3924      Int     offLo64, offHi64;
3925      IRDirty *diLo64, *diHi64;
3926      IRAtom  *addrLo64, *addrHi64;
3927      IRAtom  *vdataLo64, *vdataHi64;
3928      IRAtom  *eBiasLo64, *eBiasHi64;
3929
3930      if (end == Iend_LE) {
3931         offLo64 = 0;
3932         offHi64 = 8;
3933      } else {
3934         offLo64 = 8;
3935         offHi64 = 0;
3936      }
3937
3938      eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
3939      addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3940      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
3941      diLo64    = unsafeIRDirty_0_N(
3942                     1/*regparms*/,
3943                     hname, VG_(fnptr_to_fnentry)( helper ),
3944                     mkIRExprVec_2( addrLo64, vdataLo64 )
3945                  );
3946      eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
3947      addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3948      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
3949      diHi64    = unsafeIRDirty_0_N(
3950                     1/*regparms*/,
3951                     hname, VG_(fnptr_to_fnentry)( helper ),
3952                     mkIRExprVec_2( addrHi64, vdataHi64 )
3953                  );
3954      if (guard) diLo64->guard = guard;
3955      if (guard) diHi64->guard = guard;
3956      setHelperAnns( mce, diLo64 );
3957      setHelperAnns( mce, diHi64 );
3958      stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3959      stmt( 'V', mce, IRStmt_Dirty(diHi64) );
3960
3961   } else {
3962
3963      IRDirty *di;
3964      IRAtom  *addrAct;
3965
3966      /* 8/16/32/64-bit cases */
3967      /* Generate the actual address into addrAct. */
3968      if (bias == 0) {
3969         addrAct = addr;
3970      } else {
3971         IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3972         addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
3973      }
3974
3975      if (ty == Ity_I64) {
3976         /* We can't do this with regparm 2 on 32-bit platforms, since
3977            the back ends aren't clever enough to handle 64-bit
3978            regparm args.  Therefore be different. */
3979         di = unsafeIRDirty_0_N(
3980                 1/*regparms*/,
3981                 hname, VG_(fnptr_to_fnentry)( helper ),
3982                 mkIRExprVec_2( addrAct, vdata )
3983              );
3984      } else {
3985         di = unsafeIRDirty_0_N(
3986                 2/*regparms*/,
3987                 hname, VG_(fnptr_to_fnentry)( helper ),
3988                 mkIRExprVec_2( addrAct,
3989                                zwidenToHostWord( mce, vdata ))
3990              );
3991      }
3992      if (guard) di->guard = guard;
3993      setHelperAnns( mce, di );
3994      stmt( 'V', mce, IRStmt_Dirty(di) );
3995   }
3996
3997}
3998
3999
4000/* Do lazy pessimistic propagation through a dirty helper call, by
4001   looking at the annotations on it.  This is the most complex part of
4002   Memcheck. */
4003
4004static IRType szToITy ( Int n )
4005{
4006   switch (n) {
4007      case 1: return Ity_I8;
4008      case 2: return Ity_I16;
4009      case 4: return Ity_I32;
4010      case 8: return Ity_I64;
4011      default: VG_(tool_panic)("szToITy(memcheck)");
4012   }
4013}
4014
4015static
4016void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4017{
4018   Int       i, n, toDo, gSz, gOff;
4019   IRAtom    *src, *here, *curr;
4020   IRType    tySrc, tyDst;
4021   IRTemp    dst;
4022   IREndness end;
4023
4024   /* What's the native endianness?  We need to know this. */
4025#  if defined(VG_BIGENDIAN)
4026   end = Iend_BE;
4027#  elif defined(VG_LITTLEENDIAN)
4028   end = Iend_LE;
4029#  else
4030#    error "Unknown endianness"
4031#  endif
4032
4033   /* First check the guard. */
4034   complainIfUndefined(mce, d->guard);
4035
4036   /* Now round up all inputs and PCast over them. */
4037   curr = definedOfType(Ity_I32);
4038
4039   /* Inputs: unmasked args */
4040   for (i = 0; d->args[i]; i++) {
4041      if (d->cee->mcx_mask & (1<<i)) {
4042         /* ignore this arg */
4043      } else {
4044         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4045         curr = mkUifU32(mce, here, curr);
4046      }
4047   }
4048
4049   /* Inputs: guest state that we read. */
4050   for (i = 0; i < d->nFxState; i++) {
4051      tl_assert(d->fxState[i].fx != Ifx_None);
4052      if (d->fxState[i].fx == Ifx_Write)
4053         continue;
4054
4055      /* Ignore any sections marked as 'always defined'. */
4056      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
4057         if (0)
4058         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4059                     d->fxState[i].offset, d->fxState[i].size );
4060         continue;
4061      }
4062
4063      /* This state element is read or modified.  So we need to
4064         consider it.  If larger than 8 bytes, deal with it in 8-byte
4065         chunks. */
4066      gSz  = d->fxState[i].size;
4067      gOff = d->fxState[i].offset;
4068      tl_assert(gSz > 0);
4069      while (True) {
4070         if (gSz == 0) break;
4071         n = gSz <= 8 ? gSz : 8;
4072         /* update 'curr' with UifU of the state slice
4073            gOff .. gOff+n-1 */
4074         tySrc = szToITy( n );
4075         src   = assignNew( 'V', mce, tySrc,
4076                                 shadow_GET(mce, gOff, tySrc ) );
4077         here = mkPCastTo( mce, Ity_I32, src );
4078         curr = mkUifU32(mce, here, curr);
4079         gSz -= n;
4080         gOff += n;
4081      }
4082
4083   }
4084
4085   /* Inputs: memory.  First set up some info needed regardless of
4086      whether we're doing reads or writes. */
4087
4088   if (d->mFx != Ifx_None) {
4089      /* Because we may do multiple shadow loads/stores from the same
4090         base address, it's best to do a single test of its
4091         definedness right now.  Post-instrumentation optimisation
4092         should remove all but this test. */
4093      IRType tyAddr;
4094      tl_assert(d->mAddr);
4095      complainIfUndefined(mce, d->mAddr);
4096
4097      tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4098      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4099      tl_assert(tyAddr == mce->hWordTy); /* not really right */
4100   }
4101
4102   /* Deal with memory inputs (reads or modifies) */
4103   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4104      toDo   = d->mSize;
4105      /* chew off 32-bit chunks.  We don't care about the endianness
4106         since it's all going to be condensed down to a single bit,
4107         but nevertheless choose an endianness which is hopefully
4108         native to the platform. */
4109      while (toDo >= 4) {
4110         here = mkPCastTo(
4111                   mce, Ity_I32,
4112                   expr2vbits_Load ( mce, end, Ity_I32,
4113                                     d->mAddr, d->mSize - toDo )
4114                );
4115         curr = mkUifU32(mce, here, curr);
4116         toDo -= 4;
4117      }
4118      /* chew off 16-bit chunks */
4119      while (toDo >= 2) {
4120         here = mkPCastTo(
4121                   mce, Ity_I32,
4122                   expr2vbits_Load ( mce, end, Ity_I16,
4123                                     d->mAddr, d->mSize - toDo )
4124                );
4125         curr = mkUifU32(mce, here, curr);
4126         toDo -= 2;
4127      }
4128      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4129   }
4130
4131   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
4132      all the inputs to the helper.  Now we need to re-distribute the
4133      results to all destinations. */
4134
4135   /* Outputs: the destination temporary, if there is one. */
4136   if (d->tmp != IRTemp_INVALID) {
4137      dst   = findShadowTmpV(mce, d->tmp);
4138      tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4139      assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4140   }
4141
4142   /* Outputs: guest state that we write or modify. */
4143   for (i = 0; i < d->nFxState; i++) {
4144      tl_assert(d->fxState[i].fx != Ifx_None);
4145      if (d->fxState[i].fx == Ifx_Read)
4146         continue;
4147      /* Ignore any sections marked as 'always defined'. */
4148      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
4149         continue;
4150      /* This state element is written or modified.  So we need to
4151         consider it.  If larger than 8 bytes, deal with it in 8-byte
4152         chunks. */
4153      gSz  = d->fxState[i].size;
4154      gOff = d->fxState[i].offset;
4155      tl_assert(gSz > 0);
4156      while (True) {
4157         if (gSz == 0) break;
4158         n = gSz <= 8 ? gSz : 8;
4159         /* Write suitably-casted 'curr' to the state slice
4160            gOff .. gOff+n-1 */
4161         tyDst = szToITy( n );
4162         do_shadow_PUT( mce, gOff,
4163                             NULL, /* original atom */
4164                             mkPCastTo( mce, tyDst, curr ) );
4165         gSz -= n;
4166         gOff += n;
4167      }
4168   }
4169
4170   /* Outputs: memory that we write or modify.  Same comments about
4171      endianness as above apply. */
4172   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4173      toDo   = d->mSize;
4174      /* chew off 32-bit chunks */
4175      while (toDo >= 4) {
4176         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4177                          NULL, /* original data */
4178                          mkPCastTo( mce, Ity_I32, curr ),
4179                          NULL/*guard*/ );
4180         toDo -= 4;
4181      }
4182      /* chew off 16-bit chunks */
4183      while (toDo >= 2) {
4184         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4185                          NULL, /* original data */
4186                          mkPCastTo( mce, Ity_I16, curr ),
4187                          NULL/*guard*/ );
4188         toDo -= 2;
4189      }
4190      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
4191   }
4192
4193}
4194
4195
4196/* We have an ABI hint telling us that [base .. base+len-1] is to
4197   become undefined ("writable").  Generate code to call a helper to
4198   notify the A/V bit machinery of this fact.
4199
4200   We call
4201   void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4202                                                    Addr nia );
4203*/
4204static
4205void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4206{
4207   IRDirty* di;
4208   /* Minor optimisation: if not doing origin tracking, ignore the
4209      supplied nia and pass zero instead.  This is on the basis that
4210      MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4211      almost always generate a shorter instruction to put zero into a
4212      register than any other value. */
4213   if (MC_(clo_mc_level) < 3)
4214      nia = mkIRExpr_HWord(0);
4215
4216   di = unsafeIRDirty_0_N(
4217           0/*regparms*/,
4218           "MC_(helperc_MAKE_STACK_UNINIT)",
4219           VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4220           mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4221        );
4222   stmt( 'V', mce, IRStmt_Dirty(di) );
4223}
4224
4225
4226/* ------ Dealing with IRCAS (big and complex) ------ */
4227
4228/* FWDS */
4229static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4230                             IRAtom* baseaddr, Int offset );
4231static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4232static void    gen_store_b ( MCEnv* mce, Int szB,
4233                             IRAtom* baseaddr, Int offset, IRAtom* dataB,
4234                             IRAtom* guard );
4235
4236static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4237static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4238
4239
4240/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4241   IRExpr.Consts, else this asserts.  If they are both Consts, it
4242   doesn't do anything.  So that just leaves the RdTmp case.
4243
4244   In which case: this assigns the shadow value SHADOW to the IR
4245   shadow temporary associated with ORIG.  That is, ORIG, being an
4246   original temporary, will have a shadow temporary associated with
4247   it.  However, in the case envisaged here, there will so far have
4248   been no IR emitted to actually write a shadow value into that
4249   temporary.  What this routine does is to (emit IR to) copy the
4250   value in SHADOW into said temporary, so that after this call,
4251   IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4252   value in SHADOW.
4253
4254   Point is to allow callers to compute "by hand" a shadow value for
4255   ORIG, and force it to be associated with ORIG.
4256
4257   How do we know that that shadow associated with ORIG has not so far
4258   been assigned to?  Well, we don't per se know that, but supposing
4259   it had.  Then this routine would create a second assignment to it,
4260   and later the IR sanity checker would barf.  But that never
4261   happens.  QED.
4262*/
4263static void bind_shadow_tmp_to_orig ( UChar how,
4264                                      MCEnv* mce,
4265                                      IRAtom* orig, IRAtom* shadow )
4266{
4267   tl_assert(isOriginalAtom(mce, orig));
4268   tl_assert(isShadowAtom(mce, shadow));
4269   switch (orig->tag) {
4270      case Iex_Const:
4271         tl_assert(shadow->tag == Iex_Const);
4272         break;
4273      case Iex_RdTmp:
4274         tl_assert(shadow->tag == Iex_RdTmp);
4275         if (how == 'V') {
4276            assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4277                   shadow);
4278         } else {
4279            tl_assert(how == 'B');
4280            assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4281                   shadow);
4282         }
4283         break;
4284      default:
4285         tl_assert(0);
4286   }
4287}
4288
4289
4290static
4291void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4292{
4293   /* Scheme is (both single- and double- cases):
4294
4295      1. fetch data#,dataB (the proposed new value)
4296
4297      2. fetch expd#,expdB (what we expect to see at the address)
4298
4299      3. check definedness of address
4300
4301      4. load old#,oldB from shadow memory; this also checks
4302         addressibility of the address
4303
4304      5. the CAS itself
4305
4306      6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4307
4308      7. if "expected == old" (as computed by (6))
4309            store data#,dataB to shadow memory
4310
4311      Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4312      'data' but 7 stores 'data#'.  Hence it is possible for the
4313      shadow data to be incorrectly checked and/or updated:
4314
4315      * 7 is at least gated correctly, since the 'expected == old'
4316        condition is derived from outputs of 5.  However, the shadow
4317        write could happen too late: imagine after 5 we are
4318        descheduled, a different thread runs, writes a different
4319        (shadow) value at the address, and then we resume, hence
4320        overwriting the shadow value written by the other thread.
4321
4322      Because the original memory access is atomic, there's no way to
4323      make both the original and shadow accesses into a single atomic
4324      thing, hence this is unavoidable.
4325
4326      At least as Valgrind stands, I don't think it's a problem, since
4327      we're single threaded *and* we guarantee that there are no
4328      context switches during the execution of any specific superblock
4329      -- context switches can only happen at superblock boundaries.
4330
4331      If Valgrind ever becomes MT in the future, then it might be more
4332      of a problem.  A possible kludge would be to artificially
4333      associate with the location, a lock, which we must acquire and
4334      release around the transaction as a whole.  Hmm, that probably
4335      would't work properly since it only guards us against other
4336      threads doing CASs on the same location, not against other
4337      threads doing normal reads and writes.
4338
4339      ------------------------------------------------------------
4340
4341      COMMENT_ON_CasCmpEQ:
4342
4343      Note two things.  Firstly, in the sequence above, we compute
4344      "expected == old", but we don't check definedness of it.  Why
4345      not?  Also, the x86 and amd64 front ends use
4346      Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4347      determination (expected == old ?) for themselves, and we also
4348      don't check definedness for those primops; we just say that the
4349      result is defined.  Why?  Details follow.
4350
4351      x86/amd64 contains various forms of locked insns:
4352      * lock prefix before all basic arithmetic insn;
4353        eg lock xorl %reg1,(%reg2)
4354      * atomic exchange reg-mem
4355      * compare-and-swaps
4356
4357      Rather than attempt to represent them all, which would be a
4358      royal PITA, I used a result from Maurice Herlihy
4359      (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4360      demonstrates that compare-and-swap is a primitive more general
4361      than the other two, and so can be used to represent all of them.
4362      So the translation scheme for (eg) lock incl (%reg) is as
4363      follows:
4364
4365        again:
4366         old = * %reg
4367         new = old + 1
4368         atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4369
4370      The "atomically" is the CAS bit.  The scheme is always the same:
4371      get old value from memory, compute new value, atomically stuff
4372      new value back in memory iff the old value has not changed (iow,
4373      no other thread modified it in the meantime).  If it has changed
4374      then we've been out-raced and we have to start over.
4375
4376      Now that's all very neat, but it has the bad side effect of
4377      introducing an explicit equality test into the translation.
4378      Consider the behaviour of said code on a memory location which
4379      is uninitialised.  We will wind up doing a comparison on
4380      uninitialised data, and mc duly complains.
4381
4382      What's difficult about this is, the common case is that the
4383      location is uncontended, and so we're usually comparing the same
4384      value (* %reg) with itself.  So we shouldn't complain even if it
4385      is undefined.  But mc doesn't know that.
4386
4387      My solution is to mark the == in the IR specially, so as to tell
4388      mc that it almost certainly compares a value with itself, and we
4389      should just regard the result as always defined.  Rather than
4390      add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4391      Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4392
4393      So there's always the question of, can this give a false
4394      negative?  eg, imagine that initially, * %reg is defined; and we
4395      read that; but then in the gap between the read and the CAS, a
4396      different thread writes an undefined (and different) value at
4397      the location.  Then the CAS in this thread will fail and we will
4398      go back to "again:", but without knowing that the trip back
4399      there was based on an undefined comparison.  No matter; at least
4400      the other thread won the race and the location is correctly
4401      marked as undefined.  What if it wrote an uninitialised version
4402      of the same value that was there originally, though?
4403
4404      etc etc.  Seems like there's a small corner case in which we
4405      might lose the fact that something's defined -- we're out-raced
4406      in between the "old = * reg" and the "atomically {", _and_ the
4407      other thread is writing in an undefined version of what's
4408      already there.  Well, that seems pretty unlikely.
4409
4410      ---
4411
4412      If we ever need to reinstate it .. code which generates a
4413      definedness test for "expected == old" was removed at r10432 of
4414      this file.
4415   */
4416   if (cas->oldHi == IRTemp_INVALID) {
4417      do_shadow_CAS_single( mce, cas );
4418   } else {
4419      do_shadow_CAS_double( mce, cas );
4420   }
4421}
4422
4423
4424static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4425{
4426   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4427   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4428   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4429   IRAtom *expd_eq_old = NULL;
4430   IROp   opCasCmpEQ;
4431   Int    elemSzB;
4432   IRType elemTy;
4433   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4434
4435   /* single CAS */
4436   tl_assert(cas->oldHi == IRTemp_INVALID);
4437   tl_assert(cas->expdHi == NULL);
4438   tl_assert(cas->dataHi == NULL);
4439
4440   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4441   switch (elemTy) {
4442      case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4443      case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4444      case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4445      case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4446      default: tl_assert(0); /* IR defn disallows any other types */
4447   }
4448
4449   /* 1. fetch data# (the proposed new value) */
4450   tl_assert(isOriginalAtom(mce, cas->dataLo));
4451   vdataLo
4452      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4453   tl_assert(isShadowAtom(mce, vdataLo));
4454   if (otrak) {
4455      bdataLo
4456         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4457      tl_assert(isShadowAtom(mce, bdataLo));
4458   }
4459
4460   /* 2. fetch expected# (what we expect to see at the address) */
4461   tl_assert(isOriginalAtom(mce, cas->expdLo));
4462   vexpdLo
4463      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4464   tl_assert(isShadowAtom(mce, vexpdLo));
4465   if (otrak) {
4466      bexpdLo
4467         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4468      tl_assert(isShadowAtom(mce, bexpdLo));
4469   }
4470
4471   /* 3. check definedness of address */
4472   /* 4. fetch old# from shadow memory; this also checks
4473         addressibility of the address */
4474   voldLo
4475      = assignNew(
4476           'V', mce, elemTy,
4477           expr2vbits_Load(
4478              mce,
4479              cas->end, elemTy, cas->addr, 0/*Addr bias*/
4480        ));
4481   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4482   if (otrak) {
4483      boldLo
4484         = assignNew('B', mce, Ity_I32,
4485                     gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4486      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4487   }
4488
4489   /* 5. the CAS itself */
4490   stmt( 'C', mce, IRStmt_CAS(cas) );
4491
4492   /* 6. compute "expected == old" */
4493   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4494   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4495      tree, but it's not copied from the input block. */
4496   expd_eq_old
4497      = assignNew('C', mce, Ity_I1,
4498                  binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4499
4500   /* 7. if "expected == old"
4501            store data# to shadow memory */
4502   do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4503                    NULL/*data*/, vdataLo/*vdata*/,
4504                    expd_eq_old/*guard for store*/ );
4505   if (otrak) {
4506      gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4507                   bdataLo/*bdata*/,
4508                   expd_eq_old/*guard for store*/ );
4509   }
4510}
4511
4512
4513static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4514{
4515   IRAtom *vdataHi = NULL, *bdataHi = NULL;
4516   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4517   IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4518   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4519   IRAtom *voldHi  = NULL, *boldHi  = NULL;
4520   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4521   IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4522   IRAtom *expd_eq_old = NULL, *zero = NULL;
4523   IROp   opCasCmpEQ, opOr, opXor;
4524   Int    elemSzB, memOffsLo, memOffsHi;
4525   IRType elemTy;
4526   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4527
4528   /* double CAS */
4529   tl_assert(cas->oldHi != IRTemp_INVALID);
4530   tl_assert(cas->expdHi != NULL);
4531   tl_assert(cas->dataHi != NULL);
4532
4533   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4534   switch (elemTy) {
4535      case Ity_I8:
4536         opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4537         elemSzB = 1; zero = mkU8(0);
4538         break;
4539      case Ity_I16:
4540         opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4541         elemSzB = 2; zero = mkU16(0);
4542         break;
4543      case Ity_I32:
4544         opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4545         elemSzB = 4; zero = mkU32(0);
4546         break;
4547      case Ity_I64:
4548         opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4549         elemSzB = 8; zero = mkU64(0);
4550         break;
4551      default:
4552         tl_assert(0); /* IR defn disallows any other types */
4553   }
4554
4555   /* 1. fetch data# (the proposed new value) */
4556   tl_assert(isOriginalAtom(mce, cas->dataHi));
4557   tl_assert(isOriginalAtom(mce, cas->dataLo));
4558   vdataHi
4559      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4560   vdataLo
4561      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4562   tl_assert(isShadowAtom(mce, vdataHi));
4563   tl_assert(isShadowAtom(mce, vdataLo));
4564   if (otrak) {
4565      bdataHi
4566         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4567      bdataLo
4568         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4569      tl_assert(isShadowAtom(mce, bdataHi));
4570      tl_assert(isShadowAtom(mce, bdataLo));
4571   }
4572
4573   /* 2. fetch expected# (what we expect to see at the address) */
4574   tl_assert(isOriginalAtom(mce, cas->expdHi));
4575   tl_assert(isOriginalAtom(mce, cas->expdLo));
4576   vexpdHi
4577      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4578   vexpdLo
4579      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4580   tl_assert(isShadowAtom(mce, vexpdHi));
4581   tl_assert(isShadowAtom(mce, vexpdLo));
4582   if (otrak) {
4583      bexpdHi
4584         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4585      bexpdLo
4586         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4587      tl_assert(isShadowAtom(mce, bexpdHi));
4588      tl_assert(isShadowAtom(mce, bexpdLo));
4589   }
4590
4591   /* 3. check definedness of address */
4592   /* 4. fetch old# from shadow memory; this also checks
4593         addressibility of the address */
4594   if (cas->end == Iend_LE) {
4595      memOffsLo = 0;
4596      memOffsHi = elemSzB;
4597   } else {
4598      tl_assert(cas->end == Iend_BE);
4599      memOffsLo = elemSzB;
4600      memOffsHi = 0;
4601   }
4602   voldHi
4603      = assignNew(
4604           'V', mce, elemTy,
4605           expr2vbits_Load(
4606              mce,
4607              cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4608        ));
4609   voldLo
4610      = assignNew(
4611           'V', mce, elemTy,
4612           expr2vbits_Load(
4613              mce,
4614              cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4615        ));
4616   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4617   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4618   if (otrak) {
4619      boldHi
4620         = assignNew('B', mce, Ity_I32,
4621                     gen_load_b(mce, elemSzB, cas->addr,
4622                                memOffsHi/*addr bias*/));
4623      boldLo
4624         = assignNew('B', mce, Ity_I32,
4625                     gen_load_b(mce, elemSzB, cas->addr,
4626                                memOffsLo/*addr bias*/));
4627      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4628      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4629   }
4630
4631   /* 5. the CAS itself */
4632   stmt( 'C', mce, IRStmt_CAS(cas) );
4633
4634   /* 6. compute "expected == old" */
4635   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4636   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4637      tree, but it's not copied from the input block. */
4638   /*
4639      xHi = oldHi ^ expdHi;
4640      xLo = oldLo ^ expdLo;
4641      xHL = xHi | xLo;
4642      expd_eq_old = xHL == 0;
4643   */
4644   xHi = assignNew('C', mce, elemTy,
4645                   binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
4646   xLo = assignNew('C', mce, elemTy,
4647                   binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
4648   xHL = assignNew('C', mce, elemTy,
4649                   binop(opOr, xHi, xLo));
4650   expd_eq_old
4651      = assignNew('C', mce, Ity_I1,
4652                  binop(opCasCmpEQ, xHL, zero));
4653
4654   /* 7. if "expected == old"
4655            store data# to shadow memory */
4656   do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4657                    NULL/*data*/, vdataHi/*vdata*/,
4658                    expd_eq_old/*guard for store*/ );
4659   do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4660                    NULL/*data*/, vdataLo/*vdata*/,
4661                    expd_eq_old/*guard for store*/ );
4662   if (otrak) {
4663      gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4664                   bdataHi/*bdata*/,
4665                   expd_eq_old/*guard for store*/ );
4666      gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4667                   bdataLo/*bdata*/,
4668                   expd_eq_old/*guard for store*/ );
4669   }
4670}
4671
4672
4673/* ------ Dealing with LL/SC (not difficult) ------ */
4674
4675static void do_shadow_LLSC ( MCEnv*    mce,
4676                             IREndness stEnd,
4677                             IRTemp    stResult,
4678                             IRExpr*   stAddr,
4679                             IRExpr*   stStoredata )
4680{
4681   /* In short: treat a load-linked like a normal load followed by an
4682      assignment of the loaded (shadow) data to the result temporary.
4683      Treat a store-conditional like a normal store, and mark the
4684      result temporary as defined. */
4685   IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
4686   IRTemp resTmp = findShadowTmpV(mce, stResult);
4687
4688   tl_assert(isIRAtom(stAddr));
4689   if (stStoredata)
4690      tl_assert(isIRAtom(stStoredata));
4691
4692   if (stStoredata == NULL) {
4693      /* Load Linked */
4694      /* Just treat this as a normal load, followed by an assignment of
4695         the value to .result. */
4696      /* Stay sane */
4697      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4698                || resTy == Ity_I16 || resTy == Ity_I8);
4699      assign( 'V', mce, resTmp,
4700                   expr2vbits_Load(
4701                      mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4702   } else {
4703      /* Store Conditional */
4704      /* Stay sane */
4705      IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4706                                   stStoredata);
4707      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4708                || dataTy == Ity_I16 || dataTy == Ity_I8);
4709      do_shadow_Store( mce, stEnd,
4710                            stAddr, 0/* addr bias */,
4711                            stStoredata,
4712                            NULL /* shadow data */,
4713                            NULL/*guard*/ );
4714      /* This is a store conditional, so it writes to .result a value
4715         indicating whether or not the store succeeded.  Just claim
4716         this value is always defined.  In the PowerPC interpretation
4717         of store-conditional, definedness of the success indication
4718         depends on whether the address of the store matches the
4719         reservation address.  But we can't tell that here (and
4720         anyway, we're not being PowerPC-specific).  At least we are
4721         guaranteed that the definedness of the store address, and its
4722         addressibility, will be checked as per normal.  So it seems
4723         pretty safe to just say that the success indication is always
4724         defined.
4725
4726         In schemeS, for origin tracking, we must correspondingly set
4727         a no-origin value for the origin shadow of .result.
4728      */
4729      tl_assert(resTy == Ity_I1);
4730      assign( 'V', mce, resTmp, definedOfType(resTy) );
4731   }
4732}
4733
4734
4735/*------------------------------------------------------------*/
4736/*--- Memcheck main                                        ---*/
4737/*------------------------------------------------------------*/
4738
4739static void schemeS ( MCEnv* mce, IRStmt* st );
4740
4741static Bool isBogusAtom ( IRAtom* at )
4742{
4743   ULong n = 0;
4744   IRConst* con;
4745   tl_assert(isIRAtom(at));
4746   if (at->tag == Iex_RdTmp)
4747      return False;
4748   tl_assert(at->tag == Iex_Const);
4749   con = at->Iex.Const.con;
4750   switch (con->tag) {
4751      case Ico_U1:   return False;
4752      case Ico_U8:   n = (ULong)con->Ico.U8; break;
4753      case Ico_U16:  n = (ULong)con->Ico.U16; break;
4754      case Ico_U32:  n = (ULong)con->Ico.U32; break;
4755      case Ico_U64:  n = (ULong)con->Ico.U64; break;
4756      case Ico_F64:  return False;
4757      case Ico_F32i: return False;
4758      case Ico_F64i: return False;
4759      case Ico_V128: return False;
4760      default: ppIRExpr(at); tl_assert(0);
4761   }
4762   /* VG_(printf)("%llx\n", n); */
4763   return (/*32*/    n == 0xFEFEFEFFULL
4764           /*32*/ || n == 0x80808080ULL
4765           /*32*/ || n == 0x7F7F7F7FULL
4766           /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
4767           /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
4768           /*64*/ || n == 0x0000000000008080ULL
4769           /*64*/ || n == 0x8080808080808080ULL
4770           /*64*/ || n == 0x0101010101010101ULL
4771          );
4772}
4773
4774static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4775{
4776   Int      i;
4777   IRExpr*  e;
4778   IRDirty* d;
4779   IRCAS*   cas;
4780   switch (st->tag) {
4781      case Ist_WrTmp:
4782         e = st->Ist.WrTmp.data;
4783         switch (e->tag) {
4784            case Iex_Get:
4785            case Iex_RdTmp:
4786               return False;
4787            case Iex_Const:
4788               return isBogusAtom(e);
4789            case Iex_Unop:
4790               return isBogusAtom(e->Iex.Unop.arg);
4791            case Iex_GetI:
4792               return isBogusAtom(e->Iex.GetI.ix);
4793            case Iex_Binop:
4794               return isBogusAtom(e->Iex.Binop.arg1)
4795                      || isBogusAtom(e->Iex.Binop.arg2);
4796            case Iex_Triop:
4797               return isBogusAtom(e->Iex.Triop.arg1)
4798                      || isBogusAtom(e->Iex.Triop.arg2)
4799                      || isBogusAtom(e->Iex.Triop.arg3);
4800            case Iex_Qop:
4801               return isBogusAtom(e->Iex.Qop.arg1)
4802                      || isBogusAtom(e->Iex.Qop.arg2)
4803                      || isBogusAtom(e->Iex.Qop.arg3)
4804                      || isBogusAtom(e->Iex.Qop.arg4);
4805            case Iex_Mux0X:
4806               return isBogusAtom(e->Iex.Mux0X.cond)
4807                      || isBogusAtom(e->Iex.Mux0X.expr0)
4808                      || isBogusAtom(e->Iex.Mux0X.exprX);
4809            case Iex_Load:
4810               return isBogusAtom(e->Iex.Load.addr);
4811            case Iex_CCall:
4812               for (i = 0; e->Iex.CCall.args[i]; i++)
4813                  if (isBogusAtom(e->Iex.CCall.args[i]))
4814                     return True;
4815               return False;
4816            default:
4817               goto unhandled;
4818         }
4819      case Ist_Dirty:
4820         d = st->Ist.Dirty.details;
4821         for (i = 0; d->args[i]; i++)
4822            if (isBogusAtom(d->args[i]))
4823               return True;
4824         if (d->guard && isBogusAtom(d->guard))
4825            return True;
4826         if (d->mAddr && isBogusAtom(d->mAddr))
4827            return True;
4828         return False;
4829      case Ist_Put:
4830         return isBogusAtom(st->Ist.Put.data);
4831      case Ist_PutI:
4832         return isBogusAtom(st->Ist.PutI.ix)
4833                || isBogusAtom(st->Ist.PutI.data);
4834      case Ist_Store:
4835         return isBogusAtom(st->Ist.Store.addr)
4836                || isBogusAtom(st->Ist.Store.data);
4837      case Ist_Exit:
4838         return isBogusAtom(st->Ist.Exit.guard);
4839      case Ist_AbiHint:
4840         return isBogusAtom(st->Ist.AbiHint.base)
4841                || isBogusAtom(st->Ist.AbiHint.nia);
4842      case Ist_NoOp:
4843      case Ist_IMark:
4844      case Ist_MBE:
4845         return False;
4846      case Ist_CAS:
4847         cas = st->Ist.CAS.details;
4848         return isBogusAtom(cas->addr)
4849                || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4850                || isBogusAtom(cas->expdLo)
4851                || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4852                || isBogusAtom(cas->dataLo);
4853      case Ist_LLSC:
4854         return isBogusAtom(st->Ist.LLSC.addr)
4855                || (st->Ist.LLSC.storedata
4856                       ? isBogusAtom(st->Ist.LLSC.storedata)
4857                       : False);
4858      default:
4859      unhandled:
4860         ppIRStmt(st);
4861         VG_(tool_panic)("hasBogusLiterals");
4862   }
4863}
4864
4865
4866IRSB* MC_(instrument) ( VgCallbackClosure* closure,
4867                        IRSB* sb_in,
4868                        VexGuestLayout* layout,
4869                        VexGuestExtents* vge,
4870                        IRType gWordTy, IRType hWordTy )
4871{
4872   Bool    verboze = 0||False;
4873   Bool    bogus;
4874   Int     i, j, first_stmt;
4875   IRStmt* st;
4876   MCEnv   mce;
4877   IRSB*   sb_out;
4878
4879   if (gWordTy != hWordTy) {
4880      /* We don't currently support this case. */
4881      VG_(tool_panic)("host/guest word size mismatch");
4882   }
4883
4884   /* Check we're not completely nuts */
4885   tl_assert(sizeof(UWord)  == sizeof(void*));
4886   tl_assert(sizeof(Word)   == sizeof(void*));
4887   tl_assert(sizeof(Addr)   == sizeof(void*));
4888   tl_assert(sizeof(ULong)  == 8);
4889   tl_assert(sizeof(Long)   == 8);
4890   tl_assert(sizeof(Addr64) == 8);
4891   tl_assert(sizeof(UInt)   == 4);
4892   tl_assert(sizeof(Int)    == 4);
4893
4894   tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
4895
4896   /* Set up SB */
4897   sb_out = deepCopyIRSBExceptStmts(sb_in);
4898
4899   /* Set up the running environment.  Both .sb and .tmpMap are
4900      modified as we go along.  Note that tmps are added to both
4901      .sb->tyenv and .tmpMap together, so the valid index-set for
4902      those two arrays should always be identical. */
4903   VG_(memset)(&mce, 0, sizeof(mce));
4904   mce.sb             = sb_out;
4905   mce.trace          = verboze;
4906   mce.layout         = layout;
4907   mce.hWordTy        = hWordTy;
4908   mce.bogusLiterals  = False;
4909
4910   mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4911                            sizeof(TempMapEnt));
4912   for (i = 0; i < sb_in->tyenv->types_used; i++) {
4913      TempMapEnt ent;
4914      ent.kind    = Orig;
4915      ent.shadowV = IRTemp_INVALID;
4916      ent.shadowB = IRTemp_INVALID;
4917      VG_(addToXA)( mce.tmpMap, &ent );
4918   }
4919   tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
4920
4921   /* Make a preliminary inspection of the statements, to see if there
4922      are any dodgy-looking literals.  If there are, we generate
4923      extra-detailed (hence extra-expensive) instrumentation in
4924      places.  Scan the whole bb even if dodgyness is found earlier,
4925      so that the flatness assertion is applied to all stmts. */
4926
4927   bogus = False;
4928
4929   for (i = 0; i < sb_in->stmts_used; i++) {
4930
4931      st = sb_in->stmts[i];
4932      tl_assert(st);
4933      tl_assert(isFlatIRStmt(st));
4934
4935      if (!bogus) {
4936         bogus = checkForBogusLiterals(st);
4937         if (0 && bogus) {
4938            VG_(printf)("bogus: ");
4939            ppIRStmt(st);
4940            VG_(printf)("\n");
4941         }
4942      }
4943
4944   }
4945
4946   mce.bogusLiterals = bogus;
4947
4948   /* Copy verbatim any IR preamble preceding the first IMark */
4949
4950   tl_assert(mce.sb == sb_out);
4951   tl_assert(mce.sb != sb_in);
4952
4953   i = 0;
4954   while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
4955
4956      st = sb_in->stmts[i];
4957      tl_assert(st);
4958      tl_assert(isFlatIRStmt(st));
4959
4960      stmt( 'C', &mce, sb_in->stmts[i] );
4961      i++;
4962   }
4963
4964   /* Nasty problem.  IR optimisation of the pre-instrumented IR may
4965      cause the IR following the preamble to contain references to IR
4966      temporaries defined in the preamble.  Because the preamble isn't
4967      instrumented, these temporaries don't have any shadows.
4968      Nevertheless uses of them following the preamble will cause
4969      memcheck to generate references to their shadows.  End effect is
4970      to cause IR sanity check failures, due to references to
4971      non-existent shadows.  This is only evident for the complex
4972      preambles used for function wrapping on TOC-afflicted platforms
4973      (ppc64-linux).
4974
4975      The following loop therefore scans the preamble looking for
4976      assignments to temporaries.  For each one found it creates an
4977      assignment to the corresponding (V) shadow temp, marking it as
4978      'defined'.  This is the same resulting IR as if the main
4979      instrumentation loop before had been applied to the statement
4980      'tmp = CONSTANT'.
4981
4982      Similarly, if origin tracking is enabled, we must generate an
4983      assignment for the corresponding origin (B) shadow, claiming
4984      no-origin, as appropriate for a defined value.
4985   */
4986   for (j = 0; j < i; j++) {
4987      if (sb_in->stmts[j]->tag == Ist_WrTmp) {
4988         /* findShadowTmpV checks its arg is an original tmp;
4989            no need to assert that here. */
4990         IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
4991         IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
4992         IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
4993         assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4994         if (MC_(clo_mc_level) == 3) {
4995            IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
4996            tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
4997            assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4998         }
4999         if (0) {
5000            VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5001            ppIRType( ty_v );
5002            VG_(printf)("\n");
5003         }
5004      }
5005   }
5006
5007   /* Iterate over the remaining stmts to generate instrumentation. */
5008
5009   tl_assert(sb_in->stmts_used > 0);
5010   tl_assert(i >= 0);
5011   tl_assert(i < sb_in->stmts_used);
5012   tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5013
5014   for (/* use current i*/; i < sb_in->stmts_used; i++) {
5015
5016      st = sb_in->stmts[i];
5017      first_stmt = sb_out->stmts_used;
5018
5019      if (verboze) {
5020         VG_(printf)("\n");
5021         ppIRStmt(st);
5022         VG_(printf)("\n");
5023      }
5024
5025      if (MC_(clo_mc_level) == 3) {
5026         /* See comments on case Ist_CAS below. */
5027         if (st->tag != Ist_CAS)
5028            schemeS( &mce, st );
5029      }
5030
5031      /* Generate instrumentation code for each stmt ... */
5032
5033      switch (st->tag) {
5034
5035         case Ist_WrTmp:
5036            assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5037                               expr2vbits( &mce, st->Ist.WrTmp.data) );
5038            break;
5039
5040         case Ist_Put:
5041            do_shadow_PUT( &mce,
5042                           st->Ist.Put.offset,
5043                           st->Ist.Put.data,
5044                           NULL /* shadow atom */ );
5045            break;
5046
5047         case Ist_PutI:
5048            do_shadow_PUTI( &mce,
5049                            st->Ist.PutI.descr,
5050                            st->Ist.PutI.ix,
5051                            st->Ist.PutI.bias,
5052                            st->Ist.PutI.data );
5053            break;
5054
5055         case Ist_Store:
5056            do_shadow_Store( &mce, st->Ist.Store.end,
5057                                   st->Ist.Store.addr, 0/* addr bias */,
5058                                   st->Ist.Store.data,
5059                                   NULL /* shadow data */,
5060                                   NULL/*guard*/ );
5061            break;
5062
5063         case Ist_Exit:
5064            complainIfUndefined( &mce, st->Ist.Exit.guard );
5065            break;
5066
5067         case Ist_IMark:
5068            break;
5069
5070         case Ist_NoOp:
5071         case Ist_MBE:
5072            break;
5073
5074         case Ist_Dirty:
5075            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5076            break;
5077
5078         case Ist_AbiHint:
5079            do_AbiHint( &mce, st->Ist.AbiHint.base,
5080                              st->Ist.AbiHint.len,
5081                              st->Ist.AbiHint.nia );
5082            break;
5083
5084         case Ist_CAS:
5085            do_shadow_CAS( &mce, st->Ist.CAS.details );
5086            /* Note, do_shadow_CAS copies the CAS itself to the output
5087               block, because it needs to add instrumentation both
5088               before and after it.  Hence skip the copy below.  Also
5089               skip the origin-tracking stuff (call to schemeS) above,
5090               since that's all tangled up with it too; do_shadow_CAS
5091               does it all. */
5092            break;
5093
5094         case Ist_LLSC:
5095            do_shadow_LLSC( &mce,
5096                            st->Ist.LLSC.end,
5097                            st->Ist.LLSC.result,
5098                            st->Ist.LLSC.addr,
5099                            st->Ist.LLSC.storedata );
5100            break;
5101
5102         default:
5103            VG_(printf)("\n");
5104            ppIRStmt(st);
5105            VG_(printf)("\n");
5106            VG_(tool_panic)("memcheck: unhandled IRStmt");
5107
5108      } /* switch (st->tag) */
5109
5110      if (0 && verboze) {
5111         for (j = first_stmt; j < sb_out->stmts_used; j++) {
5112            VG_(printf)("   ");
5113            ppIRStmt(sb_out->stmts[j]);
5114            VG_(printf)("\n");
5115         }
5116         VG_(printf)("\n");
5117      }
5118
5119      /* ... and finally copy the stmt itself to the output.  Except,
5120         skip the copy of IRCASs; see comments on case Ist_CAS
5121         above. */
5122      if (st->tag != Ist_CAS)
5123         stmt('C', &mce, st);
5124   }
5125
5126   /* Now we need to complain if the jump target is undefined. */
5127   first_stmt = sb_out->stmts_used;
5128
5129   if (verboze) {
5130      VG_(printf)("sb_in->next = ");
5131      ppIRExpr(sb_in->next);
5132      VG_(printf)("\n\n");
5133   }
5134
5135   complainIfUndefined( &mce, sb_in->next );
5136
5137   if (0 && verboze) {
5138      for (j = first_stmt; j < sb_out->stmts_used; j++) {
5139         VG_(printf)("   ");
5140         ppIRStmt(sb_out->stmts[j]);
5141         VG_(printf)("\n");
5142      }
5143      VG_(printf)("\n");
5144   }
5145
5146   /* If this fails, there's been some serious snafu with tmp management,
5147      that should be investigated. */
5148   tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5149   VG_(deleteXA)( mce.tmpMap );
5150
5151   tl_assert(mce.sb == sb_out);
5152   return sb_out;
5153}
5154
5155/*------------------------------------------------------------*/
5156/*--- Post-tree-build final tidying                        ---*/
5157/*------------------------------------------------------------*/
5158
5159/* This exploits the observation that Memcheck often produces
5160   repeated conditional calls of the form
5161
5162   Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5163
5164   with the same guard expression G guarding the same helper call.
5165   The second and subsequent calls are redundant.  This usually
5166   results from instrumentation of guest code containing multiple
5167   memory references at different constant offsets from the same base
5168   register.  After optimisation of the instrumentation, you get a
5169   test for the definedness of the base register for each memory
5170   reference, which is kinda pointless.  MC_(final_tidy) therefore
5171   looks for such repeated calls and removes all but the first. */
5172
5173/* A struct for recording which (helper, guard) pairs we have already
5174   seen. */
5175typedef
5176   struct { void* entry; IRExpr* guard; }
5177   Pair;
5178
5179/* Return True if e1 and e2 definitely denote the same value (used to
5180   compare guards).  Return False if unknown; False is the safe
5181   answer.  Since guest registers and guest memory do not have the
5182   SSA property we must return False if any Gets or Loads appear in
5183   the expression. */
5184
5185static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5186{
5187   if (e1->tag != e2->tag)
5188      return False;
5189   switch (e1->tag) {
5190      case Iex_Const:
5191         return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5192      case Iex_Binop:
5193         return e1->Iex.Binop.op == e2->Iex.Binop.op
5194                && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5195                && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5196      case Iex_Unop:
5197         return e1->Iex.Unop.op == e2->Iex.Unop.op
5198                && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5199      case Iex_RdTmp:
5200         return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5201      case Iex_Mux0X:
5202         return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5203                && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5204                && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5205      case Iex_Qop:
5206      case Iex_Triop:
5207      case Iex_CCall:
5208         /* be lazy.  Could define equality for these, but they never
5209            appear to be used. */
5210         return False;
5211      case Iex_Get:
5212      case Iex_GetI:
5213      case Iex_Load:
5214         /* be conservative - these may not give the same value each
5215            time */
5216         return False;
5217      case Iex_Binder:
5218         /* should never see this */
5219         /* fallthrough */
5220      default:
5221         VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5222         ppIRExpr(e1);
5223         VG_(tool_panic)("memcheck:sameIRValue");
5224         return False;
5225   }
5226}
5227
5228/* See if 'pairs' already has an entry for (entry, guard).  Return
5229   True if so.  If not, add an entry. */
5230
5231static
5232Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5233{
5234   Pair  p;
5235   Pair* pp;
5236   Int   i, n = VG_(sizeXA)( pairs );
5237   for (i = 0; i < n; i++) {
5238      pp = VG_(indexXA)( pairs, i );
5239      if (pp->entry == entry && sameIRValue(pp->guard, guard))
5240         return True;
5241   }
5242   p.guard = guard;
5243   p.entry = entry;
5244   VG_(addToXA)( pairs, &p );
5245   return False;
5246}
5247
5248static Bool is_helperc_value_checkN_fail ( HChar* name )
5249{
5250   return
5251      0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5252      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5253      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5254      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5255      || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5256      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5257      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5258      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5259}
5260
5261IRSB* MC_(final_tidy) ( IRSB* sb_in )
5262{
5263   Int i;
5264   IRStmt*   st;
5265   IRDirty*  di;
5266   IRExpr*   guard;
5267   IRCallee* cee;
5268   Bool      alreadyPresent;
5269   XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5270                                 VG_(free), sizeof(Pair) );
5271   /* Scan forwards through the statements.  Each time a call to one
5272      of the relevant helpers is seen, check if we have made a
5273      previous call to the same helper using the same guard
5274      expression, and if so, delete the call. */
5275   for (i = 0; i < sb_in->stmts_used; i++) {
5276      st = sb_in->stmts[i];
5277      tl_assert(st);
5278      if (st->tag != Ist_Dirty)
5279         continue;
5280      di = st->Ist.Dirty.details;
5281      guard = di->guard;
5282      if (!guard)
5283         continue;
5284      if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5285      cee = di->cee;
5286      if (!is_helperc_value_checkN_fail( cee->name ))
5287         continue;
5288       /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5289          guard 'guard'.  Check if we have already seen a call to this
5290          function with the same guard.  If so, delete it.  If not,
5291          add it to the set of calls we do know about. */
5292      alreadyPresent = check_or_add( pairs, guard, cee->addr );
5293      if (alreadyPresent) {
5294         sb_in->stmts[i] = IRStmt_NoOp();
5295         if (0) VG_(printf)("XX\n");
5296      }
5297   }
5298   VG_(deleteXA)( pairs );
5299   return sb_in;
5300}
5301
5302
5303/*------------------------------------------------------------*/
5304/*--- Origin tracking stuff                                ---*/
5305/*------------------------------------------------------------*/
5306
5307/* Almost identical to findShadowTmpV. */
5308static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5309{
5310   TempMapEnt* ent;
5311   /* VG_(indexXA) range-checks 'orig', hence no need to check
5312      here. */
5313   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5314   tl_assert(ent->kind == Orig);
5315   if (ent->shadowB == IRTemp_INVALID) {
5316      IRTemp tmpB
5317        = newTemp( mce, Ity_I32, BSh );
5318      /* newTemp may cause mce->tmpMap to resize, hence previous results
5319         from VG_(indexXA) are invalid. */
5320      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5321      tl_assert(ent->kind == Orig);
5322      tl_assert(ent->shadowB == IRTemp_INVALID);
5323      ent->shadowB = tmpB;
5324   }
5325   return ent->shadowB;
5326}
5327
5328static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5329{
5330   return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5331}
5332
5333static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5334                            IRAtom* baseaddr, Int offset )
5335{
5336   void*    hFun;
5337   HChar*   hName;
5338   IRTemp   bTmp;
5339   IRDirty* di;
5340   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5341   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5342   IRAtom*  ea    = baseaddr;
5343   if (offset != 0) {
5344      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5345                                   : mkU64( (Long)(Int)offset );
5346      ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5347   }
5348   bTmp = newTemp(mce, mce->hWordTy, BSh);
5349
5350   switch (szB) {
5351      case 1: hFun  = (void*)&MC_(helperc_b_load1);
5352              hName = "MC_(helperc_b_load1)";
5353              break;
5354      case 2: hFun  = (void*)&MC_(helperc_b_load2);
5355              hName = "MC_(helperc_b_load2)";
5356              break;
5357      case 4: hFun  = (void*)&MC_(helperc_b_load4);
5358              hName = "MC_(helperc_b_load4)";
5359              break;
5360      case 8: hFun  = (void*)&MC_(helperc_b_load8);
5361              hName = "MC_(helperc_b_load8)";
5362              break;
5363      case 16: hFun  = (void*)&MC_(helperc_b_load16);
5364               hName = "MC_(helperc_b_load16)";
5365               break;
5366      default:
5367         VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5368         tl_assert(0);
5369   }
5370   di = unsafeIRDirty_1_N(
5371           bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5372           mkIRExprVec_1( ea )
5373        );
5374   /* no need to mess with any annotations.  This call accesses
5375      neither guest state nor guest memory. */
5376   stmt( 'B', mce, IRStmt_Dirty(di) );
5377   if (mce->hWordTy == Ity_I64) {
5378      /* 64-bit host */
5379      IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5380      assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5381      return mkexpr(bTmp32);
5382   } else {
5383      /* 32-bit host */
5384      return mkexpr(bTmp);
5385   }
5386}
5387
5388/* Generate a shadow store.  guard :: Ity_I1 controls whether the
5389   store really happens; NULL means it unconditionally does. */
5390static void gen_store_b ( MCEnv* mce, Int szB,
5391                          IRAtom* baseaddr, Int offset, IRAtom* dataB,
5392                          IRAtom* guard )
5393{
5394   void*    hFun;
5395   HChar*   hName;
5396   IRDirty* di;
5397   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5398   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5399   IRAtom*  ea    = baseaddr;
5400   if (guard) {
5401      tl_assert(isOriginalAtom(mce, guard));
5402      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5403   }
5404   if (offset != 0) {
5405      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5406                                   : mkU64( (Long)(Int)offset );
5407      ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5408   }
5409   if (mce->hWordTy == Ity_I64)
5410      dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5411
5412   switch (szB) {
5413      case 1: hFun  = (void*)&MC_(helperc_b_store1);
5414              hName = "MC_(helperc_b_store1)";
5415              break;
5416      case 2: hFun  = (void*)&MC_(helperc_b_store2);
5417              hName = "MC_(helperc_b_store2)";
5418              break;
5419      case 4: hFun  = (void*)&MC_(helperc_b_store4);
5420              hName = "MC_(helperc_b_store4)";
5421              break;
5422      case 8: hFun  = (void*)&MC_(helperc_b_store8);
5423              hName = "MC_(helperc_b_store8)";
5424              break;
5425      case 16: hFun  = (void*)&MC_(helperc_b_store16);
5426               hName = "MC_(helperc_b_store16)";
5427               break;
5428      default:
5429         tl_assert(0);
5430   }
5431   di = unsafeIRDirty_0_N( 2/*regparms*/,
5432           hName, VG_(fnptr_to_fnentry)( hFun ),
5433           mkIRExprVec_2( ea, dataB )
5434        );
5435   /* no need to mess with any annotations.  This call accesses
5436      neither guest state nor guest memory. */
5437   if (guard) di->guard = guard;
5438   stmt( 'B', mce, IRStmt_Dirty(di) );
5439}
5440
5441static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5442   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5443   if (eTy == Ity_I64)
5444      return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5445   if (eTy == Ity_I32)
5446      return e;
5447   tl_assert(0);
5448}
5449
5450static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5451   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5452   tl_assert(eTy == Ity_I32);
5453   if (dstTy == Ity_I64)
5454      return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5455   tl_assert(0);
5456}
5457
5458
5459static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5460{
5461   tl_assert(MC_(clo_mc_level) == 3);
5462
5463   switch (e->tag) {
5464
5465      case Iex_GetI: {
5466         IRRegArray* descr_b;
5467         IRAtom      *t1, *t2, *t3, *t4;
5468         IRRegArray* descr      = e->Iex.GetI.descr;
5469         IRType equivIntTy
5470            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5471         /* If this array is unshadowable for whatever reason, use the
5472            usual approximation. */
5473         if (equivIntTy == Ity_INVALID)
5474            return mkU32(0);
5475         tl_assert(sizeofIRType(equivIntTy) >= 4);
5476         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5477         descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5478                                 equivIntTy, descr->nElems );
5479         /* Do a shadow indexed get of the same size, giving t1.  Take
5480            the bottom 32 bits of it, giving t2.  Compute into t3 the
5481            origin for the index (almost certainly zero, but there's
5482            no harm in being completely general here, since iropt will
5483            remove any useless code), and fold it in, giving a final
5484            value t4. */
5485         t1 = assignNew( 'B', mce, equivIntTy,
5486                          IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5487                                                e->Iex.GetI.bias ));
5488         t2 = narrowTo32( mce, t1 );
5489         t3 = schemeE( mce, e->Iex.GetI.ix );
5490         t4 = gen_maxU32( mce, t2, t3 );
5491         return t4;
5492      }
5493      case Iex_CCall: {
5494         Int i;
5495         IRAtom*  here;
5496         IRExpr** args = e->Iex.CCall.args;
5497         IRAtom*  curr = mkU32(0);
5498         for (i = 0; args[i]; i++) {
5499            tl_assert(i < 32);
5500            tl_assert(isOriginalAtom(mce, args[i]));
5501            /* Only take notice of this arg if the callee's
5502               mc-exclusion mask does not say it is to be excluded. */
5503            if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5504               /* the arg is to be excluded from definedness checking.
5505                  Do nothing. */
5506               if (0) VG_(printf)("excluding %s(%d)\n",
5507                                  e->Iex.CCall.cee->name, i);
5508            } else {
5509               /* calculate the arg's definedness, and pessimistically
5510                  merge it in. */
5511               here = schemeE( mce, args[i] );
5512               curr = gen_maxU32( mce, curr, here );
5513            }
5514         }
5515         return curr;
5516      }
5517      case Iex_Load: {
5518         Int dszB;
5519         dszB = sizeofIRType(e->Iex.Load.ty);
5520         /* assert that the B value for the address is already
5521            available (somewhere) */
5522         tl_assert(isIRAtom(e->Iex.Load.addr));
5523         tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5524         return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5525      }
5526      case Iex_Mux0X: {
5527         IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5528         IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5529         IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5530         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5531      }
5532      case Iex_Qop: {
5533         IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5534         IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5535         IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5536         IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5537         return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5538                                 gen_maxU32( mce, b3, b4 ) );
5539      }
5540      case Iex_Triop: {
5541         IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5542         IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5543         IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5544         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5545      }
5546      case Iex_Binop: {
5547         switch (e->Iex.Binop.op) {
5548            case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
5549            case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5550            case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5551            case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5552               /* Just say these all produce a defined result,
5553                  regardless of their arguments.  See
5554                  COMMENT_ON_CasCmpEQ in this file. */
5555               return mkU32(0);
5556            default: {
5557               IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5558               IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5559               return gen_maxU32( mce, b1, b2 );
5560            }
5561         }
5562         tl_assert(0);
5563         /*NOTREACHED*/
5564      }
5565      case Iex_Unop: {
5566         IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5567         return b1;
5568      }
5569      case Iex_Const:
5570         return mkU32(0);
5571      case Iex_RdTmp:
5572         return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5573      case Iex_Get: {
5574         Int b_offset = MC_(get_otrack_shadow_offset)(
5575                           e->Iex.Get.offset,
5576                           sizeofIRType(e->Iex.Get.ty)
5577                        );
5578         tl_assert(b_offset >= -1
5579                   && b_offset <= mce->layout->total_sizeB -4);
5580         if (b_offset >= 0) {
5581            /* FIXME: this isn't an atom! */
5582            return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5583                               Ity_I32 );
5584         }
5585         return mkU32(0);
5586      }
5587      default:
5588         VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5589         ppIRExpr(e);
5590         VG_(tool_panic)("memcheck:schemeE");
5591   }
5592}
5593
5594
5595static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5596{
5597   // This is a hacked version of do_shadow_Dirty
5598   Int       i, n, toDo, gSz, gOff;
5599   IRAtom    *here, *curr;
5600   IRTemp    dst;
5601
5602   /* First check the guard. */
5603   curr = schemeE( mce, d->guard );
5604
5605   /* Now round up all inputs and maxU32 over them. */
5606
5607   /* Inputs: unmasked args */
5608   for (i = 0; d->args[i]; i++) {
5609      if (d->cee->mcx_mask & (1<<i)) {
5610         /* ignore this arg */
5611      } else {
5612         here = schemeE( mce, d->args[i] );
5613         curr = gen_maxU32( mce, curr, here );
5614      }
5615   }
5616
5617   /* Inputs: guest state that we read. */
5618   for (i = 0; i < d->nFxState; i++) {
5619      tl_assert(d->fxState[i].fx != Ifx_None);
5620      if (d->fxState[i].fx == Ifx_Write)
5621         continue;
5622
5623      /* Ignore any sections marked as 'always defined'. */
5624      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5625         if (0)
5626         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5627                     d->fxState[i].offset, d->fxState[i].size );
5628         continue;
5629      }
5630
5631      /* This state element is read or modified.  So we need to
5632         consider it.  If larger than 4 bytes, deal with it in 4-byte
5633         chunks. */
5634      gSz  = d->fxState[i].size;
5635      gOff = d->fxState[i].offset;
5636      tl_assert(gSz > 0);
5637      while (True) {
5638         Int b_offset;
5639         if (gSz == 0) break;
5640         n = gSz <= 4 ? gSz : 4;
5641         /* update 'curr' with maxU32 of the state slice
5642            gOff .. gOff+n-1 */
5643         b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5644         if (b_offset != -1) {
5645            here = assignNew( 'B',mce,
5646                               Ity_I32,
5647                               IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5648                                          Ity_I32));
5649            curr = gen_maxU32( mce, curr, here );
5650         }
5651         gSz -= n;
5652         gOff += n;
5653      }
5654
5655   }
5656
5657   /* Inputs: memory */
5658
5659   if (d->mFx != Ifx_None) {
5660      /* Because we may do multiple shadow loads/stores from the same
5661         base address, it's best to do a single test of its
5662         definedness right now.  Post-instrumentation optimisation
5663         should remove all but this test. */
5664      tl_assert(d->mAddr);
5665      here = schemeE( mce, d->mAddr );
5666      curr = gen_maxU32( mce, curr, here );
5667   }
5668
5669   /* Deal with memory inputs (reads or modifies) */
5670   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5671      toDo   = d->mSize;
5672      /* chew off 32-bit chunks.  We don't care about the endianness
5673         since it's all going to be condensed down to a single bit,
5674         but nevertheless choose an endianness which is hopefully
5675         native to the platform. */
5676      while (toDo >= 4) {
5677         here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5678         curr = gen_maxU32( mce, curr, here );
5679         toDo -= 4;
5680      }
5681      /* handle possible 16-bit excess */
5682      while (toDo >= 2) {
5683         here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5684         curr = gen_maxU32( mce, curr, here );
5685         toDo -= 2;
5686      }
5687      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5688   }
5689
5690   /* Whew!  So curr is a 32-bit B-value which should give an origin
5691      of some use if any of the inputs to the helper are undefined.
5692      Now we need to re-distribute the results to all destinations. */
5693
5694   /* Outputs: the destination temporary, if there is one. */
5695   if (d->tmp != IRTemp_INVALID) {
5696      dst   = findShadowTmpB(mce, d->tmp);
5697      assign( 'V', mce, dst, curr );
5698   }
5699
5700   /* Outputs: guest state that we write or modify. */
5701   for (i = 0; i < d->nFxState; i++) {
5702      tl_assert(d->fxState[i].fx != Ifx_None);
5703      if (d->fxState[i].fx == Ifx_Read)
5704         continue;
5705
5706      /* Ignore any sections marked as 'always defined'. */
5707      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5708         continue;
5709
5710      /* This state element is written or modified.  So we need to
5711         consider it.  If larger than 4 bytes, deal with it in 4-byte
5712         chunks. */
5713      gSz  = d->fxState[i].size;
5714      gOff = d->fxState[i].offset;
5715      tl_assert(gSz > 0);
5716      while (True) {
5717         Int b_offset;
5718         if (gSz == 0) break;
5719         n = gSz <= 4 ? gSz : 4;
5720         /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5721         b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5722         if (b_offset != -1) {
5723           stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5724                                      curr ));
5725         }
5726         gSz -= n;
5727         gOff += n;
5728      }
5729   }
5730
5731   /* Outputs: memory that we write or modify.  Same comments about
5732      endianness as above apply. */
5733   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5734      toDo   = d->mSize;
5735      /* chew off 32-bit chunks */
5736      while (toDo >= 4) {
5737         gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5738                      NULL/*guard*/ );
5739         toDo -= 4;
5740      }
5741      /* handle possible 16-bit excess */
5742      while (toDo >= 2) {
5743        gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5744                     NULL/*guard*/ );
5745         toDo -= 2;
5746      }
5747      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5748   }
5749}
5750
5751
5752static void do_origins_Store ( MCEnv* mce,
5753                               IREndness stEnd,
5754                               IRExpr* stAddr,
5755                               IRExpr* stData )
5756{
5757   Int     dszB;
5758   IRAtom* dataB;
5759   /* assert that the B value for the address is already available
5760      (somewhere), since the call to schemeE will want to see it.
5761      XXXX how does this actually ensure that?? */
5762   tl_assert(isIRAtom(stAddr));
5763   tl_assert(isIRAtom(stData));
5764   dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5765   dataB = schemeE( mce, stData );
5766   gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5767                     NULL/*guard*/ );
5768}
5769
5770
5771static void schemeS ( MCEnv* mce, IRStmt* st )
5772{
5773   tl_assert(MC_(clo_mc_level) == 3);
5774
5775   switch (st->tag) {
5776
5777      case Ist_AbiHint:
5778         /* The value-check instrumenter handles this - by arranging
5779            to pass the address of the next instruction to
5780            MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
5781            happen for origin tracking w.r.t. AbiHints.  So there is
5782            nothing to do here. */
5783         break;
5784
5785      case Ist_PutI: {
5786         IRRegArray* descr_b;
5787         IRAtom      *t1, *t2, *t3, *t4;
5788         IRRegArray* descr = st->Ist.PutI.descr;
5789         IRType equivIntTy
5790            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5791         /* If this array is unshadowable for whatever reason,
5792            generate no code. */
5793         if (equivIntTy == Ity_INVALID)
5794            break;
5795         tl_assert(sizeofIRType(equivIntTy) >= 4);
5796         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5797         descr_b
5798            = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5799                            equivIntTy, descr->nElems );
5800         /* Compute a value to Put - the conjoinment of the origin for
5801            the data to be Put-ted (obviously) and of the index value
5802            (not so obviously). */
5803         t1 = schemeE( mce, st->Ist.PutI.data );
5804         t2 = schemeE( mce, st->Ist.PutI.ix );
5805         t3 = gen_maxU32( mce, t1, t2 );
5806         t4 = zWidenFrom32( mce, equivIntTy, t3 );
5807         stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5808                                      st->Ist.PutI.bias, t4 ));
5809         break;
5810      }
5811
5812      case Ist_Dirty:
5813         do_origins_Dirty( mce, st->Ist.Dirty.details );
5814         break;
5815
5816      case Ist_Store:
5817         do_origins_Store( mce, st->Ist.Store.end,
5818                                st->Ist.Store.addr,
5819                                st->Ist.Store.data );
5820         break;
5821
5822      case Ist_LLSC: {
5823         /* In short: treat a load-linked like a normal load followed
5824            by an assignment of the loaded (shadow) data the result
5825            temporary.  Treat a store-conditional like a normal store,
5826            and mark the result temporary as defined. */
5827         if (st->Ist.LLSC.storedata == NULL) {
5828            /* Load Linked */
5829            IRType resTy
5830               = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5831            IRExpr* vanillaLoad
5832               = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5833            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5834                      || resTy == Ity_I16 || resTy == Ity_I8);
5835            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5836                              schemeE(mce, vanillaLoad));
5837         } else {
5838            /* Store conditional */
5839            do_origins_Store( mce, st->Ist.LLSC.end,
5840                                   st->Ist.LLSC.addr,
5841                                   st->Ist.LLSC.storedata );
5842            /* For the rationale behind this, see comments at the
5843               place where the V-shadow for .result is constructed, in
5844               do_shadow_LLSC.  In short, we regard .result as
5845               always-defined. */
5846            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5847                              mkU32(0) );
5848         }
5849         break;
5850      }
5851
5852      case Ist_Put: {
5853         Int b_offset
5854            = MC_(get_otrack_shadow_offset)(
5855                 st->Ist.Put.offset,
5856                 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
5857              );
5858         if (b_offset >= 0) {
5859            /* FIXME: this isn't an atom! */
5860            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5861                                       schemeE( mce, st->Ist.Put.data )) );
5862         }
5863         break;
5864      }
5865
5866      case Ist_WrTmp:
5867         assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5868                           schemeE(mce, st->Ist.WrTmp.data) );
5869         break;
5870
5871      case Ist_MBE:
5872      case Ist_NoOp:
5873      case Ist_Exit:
5874      case Ist_IMark:
5875         break;
5876
5877      default:
5878         VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5879         ppIRStmt(st);
5880         VG_(tool_panic)("memcheck:schemeS");
5881   }
5882}
5883
5884
5885/*--------------------------------------------------------------------*/
5886/*--- end                                           mc_translate.c ---*/
5887/*--------------------------------------------------------------------*/
5888