mc_translate.c revision f517634b4a879b7653efa40d60c62fa3419809ed
1
2/*--------------------------------------------------------------------*/
3/*--- Instrument IR to perform memory checking operations.         ---*/
4/*---                                               mc_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of MemCheck, a heavyweight Valgrind tool for
9   detecting memory errors.
10
11   Copyright (C) 2000-2012 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_tool_basics.h"
33#include "pub_tool_poolalloc.h"     // For mc_include.h
34#include "pub_tool_hashtable.h"     // For mc_include.h
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_tooliface.h"
38#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
39#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
42
43#include "mc_include.h"
44
45
46/* FIXMEs JRS 2011-June-16.
47
48   Check the interpretation for vector narrowing and widening ops,
49   particularly the saturating ones.  I suspect they are either overly
50   pessimistic and/or wrong.
51*/
52
53/* This file implements the Memcheck instrumentation, and in
54   particular contains the core of its undefined value detection
55   machinery.  For a comprehensive background of the terminology,
56   algorithms and rationale used herein, read:
57
58     Using Valgrind to detect undefined value errors with
59     bit-precision
60
61     Julian Seward and Nicholas Nethercote
62
63     2005 USENIX Annual Technical Conference (General Track),
64     Anaheim, CA, USA, April 10-15, 2005.
65
66   ----
67
68   Here is as good a place as any to record exactly when V bits are and
69   should be checked, why, and what function is responsible.
70
71
72   Memcheck complains when an undefined value is used:
73
74   1. In the condition of a conditional branch.  Because it could cause
75      incorrect control flow, and thus cause incorrect externally-visible
76      behaviour.  [mc_translate.c:complainIfUndefined]
77
78   2. As an argument to a system call, or as the value that specifies
79      the system call number.  Because it could cause an incorrect
80      externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
81
82   3. As the address in a load or store.  Because it could cause an
83      incorrect value to be used later, which could cause externally-visible
84      behaviour (eg. via incorrect control flow or an incorrect system call
85      argument)  [complainIfUndefined]
86
87   4. As the target address of a branch.  Because it could cause incorrect
88      control flow.  [complainIfUndefined]
89
90   5. As an argument to setenv, unsetenv, or putenv.  Because it could put
91      an incorrect value into the external environment.
92      [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94   6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
95      [complainIfUndefined]
96
97   7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98      VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
99      requested it.  [in memcheck.h]
100
101
102   Memcheck also complains, but should not, when an undefined value is used:
103
104   8. As the shift value in certain SIMD shift operations (but not in the
105      standard integer shift operations).  This inconsistency is due to
106      historical reasons.)  [complainIfUndefined]
107
108
109   Memcheck does not complain, but should, when an undefined value is used:
110
111   9. As an input to a client request.  Because the client request may
112      affect the visible behaviour -- see bug #144362 for an example
113      involving the malloc replacements in vg_replace_malloc.c and
114      VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115      isn't identified.  That bug report also has some info on how to solve
116      the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119   In practice, 1 and 2 account for the vast majority of cases.
120*/
121
122/*------------------------------------------------------------*/
123/*--- Forward decls                                        ---*/
124/*------------------------------------------------------------*/
125
126struct _MCEnv;
127
128static IRType  shadowTypeV ( IRType ty );
129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
130static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
131
132static IRExpr *i128_const_zero(void);
133
134/*------------------------------------------------------------*/
135/*--- Memcheck running state, and tmp management.          ---*/
136/*------------------------------------------------------------*/
137
138/* Carries info about a particular tmp.  The tmp's number is not
139   recorded, as this is implied by (equal to) its index in the tmpMap
140   in MCEnv.  The tmp's type is also not recorded, as this is present
141   in MCEnv.sb->tyenv.
142
143   When .kind is Orig, .shadowV and .shadowB may give the identities
144   of the temps currently holding the associated definedness (shadowV)
145   and origin (shadowB) values, or these may be IRTemp_INVALID if code
146   to compute such values has not yet been emitted.
147
148   When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149   and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150   illogical for a shadow tmp itself to be shadowed.
151*/
152typedef
153   enum { Orig=1, VSh=2, BSh=3 }
154   TempKind;
155
156typedef
157   struct {
158      TempKind kind;
159      IRTemp   shadowV;
160      IRTemp   shadowB;
161   }
162   TempMapEnt;
163
164
165/* Carries around state during memcheck instrumentation. */
166typedef
167   struct _MCEnv {
168      /* MODIFIED: the superblock being constructed.  IRStmts are
169         added. */
170      IRSB* sb;
171      Bool  trace;
172
173      /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174         current kind and possibly shadow temps for each temp in the
175         IRSB being constructed.  Note that it does not contain the
176         type of each tmp.  If you want to know the type, look at the
177         relevant entry in sb->tyenv.  It follows that at all times
178         during the instrumentation process, the valid indices for
179         tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180         total number of Orig, V- and B- temps allocated so far.
181
182         The reason for this strange split (types in one place, all
183         other info in another) is that we need the types to be
184         attached to sb so as to make it possible to do
185         "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186         instrumentation process. */
187      XArray* /* of TempMapEnt */ tmpMap;
188
189      /* MODIFIED: indicates whether "bogus" literals have so far been
190         found.  Starts off False, and may change to True. */
191      Bool bogusLiterals;
192
193      /* READONLY: indicates whether we should use expensive
194         interpretations of integer adds, since unfortunately LLVM
195         uses them to do ORs in some circumstances.  Defaulted to True
196         on MacOS and False everywhere else. */
197      Bool useLLVMworkarounds;
198
199      /* READONLY: the guest layout.  This indicates which parts of
200         the guest state should be regarded as 'always defined'. */
201      VexGuestLayout* layout;
202
203      /* READONLY: the host word type.  Needed for constructing
204         arguments of type 'HWord' to be passed to helper functions.
205         Ity_I32 or Ity_I64 only. */
206      IRType hWordTy;
207   }
208   MCEnv;
209
210/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
211   demand), as they are encountered.  This is for two reasons.
212
213   (1) (less important reason): Many original tmps are unused due to
214   initial IR optimisation, and we do not want to spaces in tables
215   tracking them.
216
217   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
218   table indexed [0 .. n_types-1], which gives the current shadow for
219   each original tmp, or INVALID_IRTEMP if none is so far assigned.
220   It is necessary to support making multiple assignments to a shadow
221   -- specifically, after testing a shadow for definedness, it needs
222   to be made defined.  But IR's SSA property disallows this.
223
224   (2) (more important reason): Therefore, when a shadow needs to get
225   a new value, a new temporary is created, the value is assigned to
226   that, and the tmpMap is updated to reflect the new binding.
227
228   A corollary is that if the tmpMap maps a given tmp to
229   IRTemp_INVALID and we are hoping to read that shadow tmp, it means
230   there's a read-before-write error in the original tmps.  The IR
231   sanity checker should catch all such anomalies, however.
232*/
233
234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235   both the table in mce->sb and to our auxiliary mapping.  Note that
236   newTemp may cause mce->tmpMap to resize, hence previous results
237   from VG_(indexXA)(mce->tmpMap) are invalidated. */
238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239{
240   Word       newIx;
241   TempMapEnt ent;
242   IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
243   ent.kind    = kind;
244   ent.shadowV = IRTemp_INVALID;
245   ent.shadowB = IRTemp_INVALID;
246   newIx = VG_(addToXA)( mce->tmpMap, &ent );
247   tl_assert(newIx == (Word)tmp);
248   return tmp;
249}
250
251
252/* Find the tmp currently shadowing the given original tmp.  If none
253   so far exists, allocate one.  */
254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
255{
256   TempMapEnt* ent;
257   /* VG_(indexXA) range-checks 'orig', hence no need to check
258      here. */
259   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260   tl_assert(ent->kind == Orig);
261   if (ent->shadowV == IRTemp_INVALID) {
262      IRTemp tmpV
263        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264      /* newTemp may cause mce->tmpMap to resize, hence previous results
265         from VG_(indexXA) are invalid. */
266      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267      tl_assert(ent->kind == Orig);
268      tl_assert(ent->shadowV == IRTemp_INVALID);
269      ent->shadowV = tmpV;
270   }
271   return ent->shadowV;
272}
273
274/* Allocate a new shadow for the given original tmp.  This means any
275   previous shadow is abandoned.  This is needed because it is
276   necessary to give a new value to a shadow once it has been tested
277   for undefinedness, but unfortunately IR's SSA property disallows
278   this.  Instead we must abandon the old shadow, allocate a new one
279   and use that instead.
280
281   This is the same as findShadowTmpV, except we don't bother to see
282   if a shadow temp already existed -- we simply allocate a new one
283   regardless. */
284static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
285{
286   TempMapEnt* ent;
287   /* VG_(indexXA) range-checks 'orig', hence no need to check
288      here. */
289   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290   tl_assert(ent->kind == Orig);
291   if (1) {
292      IRTemp tmpV
293        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294      /* newTemp may cause mce->tmpMap to resize, hence previous results
295         from VG_(indexXA) are invalid. */
296      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297      tl_assert(ent->kind == Orig);
298      ent->shadowV = tmpV;
299   }
300}
301
302
303/*------------------------------------------------------------*/
304/*--- IRAtoms -- a subset of IRExprs                       ---*/
305/*------------------------------------------------------------*/
306
307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
308   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
309   input, most of this code deals in atoms.  Usefully, a value atom
310   always has a V-value which is also an atom: constants are shadowed
311   by constants, and temps are shadowed by the corresponding shadow
312   temporary. */
313
314typedef  IRExpr  IRAtom;
315
316/* (used for sanity checks only): is this an atom which looks
317   like it's from original code? */
318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319{
320   if (a1->tag == Iex_Const)
321      return True;
322   if (a1->tag == Iex_RdTmp) {
323      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324      return ent->kind == Orig;
325   }
326   return False;
327}
328
329/* (used for sanity checks only): is this an atom which looks
330   like it's from shadow code? */
331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332{
333   if (a1->tag == Iex_Const)
334      return True;
335   if (a1->tag == Iex_RdTmp) {
336      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337      return ent->kind == VSh || ent->kind == BSh;
338   }
339   return False;
340}
341
342/* (used for sanity checks only): check that both args are atoms and
343   are identically-kinded. */
344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345{
346   if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
347      return True;
348   if (a1->tag == Iex_Const && a2->tag == Iex_Const)
349      return True;
350   return False;
351}
352
353
354/*------------------------------------------------------------*/
355/*--- Type management                                      ---*/
356/*------------------------------------------------------------*/
357
358/* Shadow state is always accessed using integer types.  This returns
359   an integer type with the same size (as per sizeofIRType) as the
360   given type.  The only valid shadow types are Bit, I8, I16, I32,
361   I64, I128, V128, V256. */
362
363static IRType shadowTypeV ( IRType ty )
364{
365   switch (ty) {
366      case Ity_I1:
367      case Ity_I8:
368      case Ity_I16:
369      case Ity_I32:
370      case Ity_I64:
371      case Ity_I128: return ty;
372      case Ity_F32:  return Ity_I32;
373      case Ity_D32:  return Ity_I32;
374      case Ity_F64:  return Ity_I64;
375      case Ity_D64:  return Ity_I64;
376      case Ity_F128: return Ity_I128;
377      case Ity_D128: return Ity_I128;
378      case Ity_V128: return Ity_V128;
379      case Ity_V256: return Ity_V256;
380      default: ppIRType(ty);
381               VG_(tool_panic)("memcheck:shadowTypeV");
382   }
383}
384
385/* Produce a 'defined' value of the given shadow type.  Should only be
386   supplied shadow types (Bit/I8/I16/I32/UI64). */
387static IRExpr* definedOfType ( IRType ty ) {
388   switch (ty) {
389      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
390      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
391      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
392      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
393      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
394      case Ity_I128: return i128_const_zero();
395      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
396      default:       VG_(tool_panic)("memcheck:definedOfType");
397   }
398}
399
400
401/*------------------------------------------------------------*/
402/*--- Constructing IR fragments                            ---*/
403/*------------------------------------------------------------*/
404
405/* add stmt to a bb */
406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407   if (mce->trace) {
408      VG_(printf)("  %c: ", cat);
409      ppIRStmt(st);
410      VG_(printf)("\n");
411   }
412   addStmtToIRSB(mce->sb, st);
413}
414
415/* assign value to tmp */
416static inline
417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
418   stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
419}
420
421/* build various kinds of expressions */
422#define triop(_op, _arg1, _arg2, _arg3) \
423                                 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
426#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
427#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
428#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
429#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
430#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
431#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
432
433/* Bind the given expression to a new temporary, and return the
434   temporary.  This effectively converts an arbitrary expression into
435   an atom.
436
437   'ty' is the type of 'e' and hence the type that the new temporary
438   needs to be.  But passing it in is redundant, since we can deduce
439   the type merely by inspecting 'e'.  So at least use that fact to
440   assert that the two types agree. */
441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442{
443   TempKind k;
444   IRTemp   t;
445   IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
446
447   tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
448   switch (cat) {
449      case 'V': k = VSh;  break;
450      case 'B': k = BSh;  break;
451      case 'C': k = Orig; break;
452                /* happens when we are making up new "orig"
453                   expressions, for IRCAS handling */
454      default: tl_assert(0);
455   }
456   t = newTemp(mce, ty, k);
457   assign(cat, mce, t, e);
458   return mkexpr(t);
459}
460
461
462/*------------------------------------------------------------*/
463/*--- Helper functions for 128-bit ops                     ---*/
464/*------------------------------------------------------------*/
465
466static IRExpr *i128_const_zero(void)
467{
468   IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469   return binop(Iop_64HLto128, z64, z64);
470}
471
472/* There are no I128-bit loads and/or stores [as generated by any
473   current front ends].  So we do not need to worry about that in
474   expr2vbits_Load */
475
476
477/*------------------------------------------------------------*/
478/*--- Constructing definedness primitive ops               ---*/
479/*------------------------------------------------------------*/
480
481/* --------- Defined-if-either-defined --------- */
482
483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484   tl_assert(isShadowAtom(mce,a1));
485   tl_assert(isShadowAtom(mce,a2));
486   return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
487}
488
489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490   tl_assert(isShadowAtom(mce,a1));
491   tl_assert(isShadowAtom(mce,a2));
492   return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
493}
494
495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496   tl_assert(isShadowAtom(mce,a1));
497   tl_assert(isShadowAtom(mce,a2));
498   return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
499}
500
501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502   tl_assert(isShadowAtom(mce,a1));
503   tl_assert(isShadowAtom(mce,a2));
504   return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
505}
506
507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508   tl_assert(isShadowAtom(mce,a1));
509   tl_assert(isShadowAtom(mce,a2));
510   return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
511}
512
513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514   tl_assert(isShadowAtom(mce,a1));
515   tl_assert(isShadowAtom(mce,a2));
516   return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517}
518
519/* --------- Undefined-if-either-undefined --------- */
520
521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522   tl_assert(isShadowAtom(mce,a1));
523   tl_assert(isShadowAtom(mce,a2));
524   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
525}
526
527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528   tl_assert(isShadowAtom(mce,a1));
529   tl_assert(isShadowAtom(mce,a2));
530   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
531}
532
533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534   tl_assert(isShadowAtom(mce,a1));
535   tl_assert(isShadowAtom(mce,a2));
536   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
537}
538
539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540   tl_assert(isShadowAtom(mce,a1));
541   tl_assert(isShadowAtom(mce,a2));
542   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
543}
544
545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546   IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547   tl_assert(isShadowAtom(mce,a1));
548   tl_assert(isShadowAtom(mce,a2));
549   tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550   tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551   tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552   tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553   tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554   tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556   return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557}
558
559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
560   tl_assert(isShadowAtom(mce,a1));
561   tl_assert(isShadowAtom(mce,a2));
562   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
563}
564
565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566   tl_assert(isShadowAtom(mce,a1));
567   tl_assert(isShadowAtom(mce,a2));
568   return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569}
570
571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
572   switch (vty) {
573      case Ity_I8:   return mkUifU8(mce, a1, a2);
574      case Ity_I16:  return mkUifU16(mce, a1, a2);
575      case Ity_I32:  return mkUifU32(mce, a1, a2);
576      case Ity_I64:  return mkUifU64(mce, a1, a2);
577      case Ity_I128: return mkUifU128(mce, a1, a2);
578      case Ity_V128: return mkUifUV128(mce, a1, a2);
579      default:
580         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581         VG_(tool_panic)("memcheck:mkUifU");
582   }
583}
584
585/* --------- The Left-family of operations. --------- */
586
587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588   tl_assert(isShadowAtom(mce,a1));
589   return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
590}
591
592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593   tl_assert(isShadowAtom(mce,a1));
594   return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
595}
596
597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598   tl_assert(isShadowAtom(mce,a1));
599   return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
600}
601
602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603   tl_assert(isShadowAtom(mce,a1));
604   return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
605}
606
607/* --------- 'Improvement' functions for AND/OR. --------- */
608
609/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
610   defined (0); all other -> undefined (1).
611*/
612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
613{
614   tl_assert(isOriginalAtom(mce, data));
615   tl_assert(isShadowAtom(mce, vbits));
616   tl_assert(sameKindedAtoms(data, vbits));
617   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
618}
619
620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621{
622   tl_assert(isOriginalAtom(mce, data));
623   tl_assert(isShadowAtom(mce, vbits));
624   tl_assert(sameKindedAtoms(data, vbits));
625   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
626}
627
628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630   tl_assert(isOriginalAtom(mce, data));
631   tl_assert(isShadowAtom(mce, vbits));
632   tl_assert(sameKindedAtoms(data, vbits));
633   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
634}
635
636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638   tl_assert(isOriginalAtom(mce, data));
639   tl_assert(isShadowAtom(mce, vbits));
640   tl_assert(sameKindedAtoms(data, vbits));
641   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
642}
643
644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645{
646   tl_assert(isOriginalAtom(mce, data));
647   tl_assert(isShadowAtom(mce, vbits));
648   tl_assert(sameKindedAtoms(data, vbits));
649   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
650}
651
652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654   tl_assert(isOriginalAtom(mce, data));
655   tl_assert(isShadowAtom(mce, vbits));
656   tl_assert(sameKindedAtoms(data, vbits));
657   return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658}
659
660/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
661   defined (0); all other -> undefined (1).
662*/
663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664{
665   tl_assert(isOriginalAtom(mce, data));
666   tl_assert(isShadowAtom(mce, vbits));
667   tl_assert(sameKindedAtoms(data, vbits));
668   return assignNew(
669             'V', mce, Ity_I8,
670             binop(Iop_Or8,
671                   assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
672                   vbits) );
673}
674
675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676{
677   tl_assert(isOriginalAtom(mce, data));
678   tl_assert(isShadowAtom(mce, vbits));
679   tl_assert(sameKindedAtoms(data, vbits));
680   return assignNew(
681             'V', mce, Ity_I16,
682             binop(Iop_Or16,
683                   assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
684                   vbits) );
685}
686
687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688{
689   tl_assert(isOriginalAtom(mce, data));
690   tl_assert(isShadowAtom(mce, vbits));
691   tl_assert(sameKindedAtoms(data, vbits));
692   return assignNew(
693             'V', mce, Ity_I32,
694             binop(Iop_Or32,
695                   assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
696                   vbits) );
697}
698
699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700{
701   tl_assert(isOriginalAtom(mce, data));
702   tl_assert(isShadowAtom(mce, vbits));
703   tl_assert(sameKindedAtoms(data, vbits));
704   return assignNew(
705             'V', mce, Ity_I64,
706             binop(Iop_Or64,
707                   assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
708                   vbits) );
709}
710
711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
712{
713   tl_assert(isOriginalAtom(mce, data));
714   tl_assert(isShadowAtom(mce, vbits));
715   tl_assert(sameKindedAtoms(data, vbits));
716   return assignNew(
717             'V', mce, Ity_V128,
718             binop(Iop_OrV128,
719                   assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
720                   vbits) );
721}
722
723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724{
725   tl_assert(isOriginalAtom(mce, data));
726   tl_assert(isShadowAtom(mce, vbits));
727   tl_assert(sameKindedAtoms(data, vbits));
728   return assignNew(
729             'V', mce, Ity_V256,
730             binop(Iop_OrV256,
731                   assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732                   vbits) );
733}
734
735/* --------- Pessimising casts. --------- */
736
737/* The function returns an expression of type DST_TY. If any of the VBITS
738   is undefined (value == 1) the resulting expression has all bits set to
739   1. Otherwise, all bits are 0. */
740
741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742{
743   IRType  src_ty;
744   IRAtom* tmp1;
745
746   /* Note, dst_ty is a shadow type, not an original type. */
747   tl_assert(isShadowAtom(mce,vbits));
748   src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
749
750   /* Fast-track some common cases */
751   if (src_ty == Ity_I32 && dst_ty == Ity_I32)
752      return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
753
754   if (src_ty == Ity_I64 && dst_ty == Ity_I64)
755      return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
756
757   if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
758      /* PCast the arg, then clone it. */
759      IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760      return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
761   }
762
763   if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764      /* PCast the arg.  This gives all 0s or all 1s.  Then throw away
765         the top half. */
766      IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767      return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768   }
769
770   /* Else do it the slow way .. */
771   /* First of all, collapse vbits down to a single bit. */
772   tmp1   = NULL;
773   switch (src_ty) {
774      case Ity_I1:
775         tmp1 = vbits;
776         break;
777      case Ity_I8:
778         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
779         break;
780      case Ity_I16:
781         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
782         break;
783      case Ity_I32:
784         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
785         break;
786      case Ity_I64:
787         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
788         break;
789      case Ity_I128: {
790         /* Gah.  Chop it in half, OR the halves together, and compare
791            that with zero. */
792         IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793         IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794         IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795         tmp1         = assignNew('V', mce, Ity_I1,
796                                       unop(Iop_CmpNEZ64, tmp4));
797         break;
798      }
799      default:
800         ppIRType(src_ty);
801         VG_(tool_panic)("mkPCastTo(1)");
802   }
803   tl_assert(tmp1);
804   /* Now widen up to the dst type. */
805   switch (dst_ty) {
806      case Ity_I1:
807         return tmp1;
808      case Ity_I8:
809         return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
810      case Ity_I16:
811         return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
812      case Ity_I32:
813         return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
814      case Ity_I64:
815         return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
816      case Ity_V128:
817         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
818         tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
819         return tmp1;
820      case Ity_I128:
821         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
822         tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
823         return tmp1;
824      default:
825         ppIRType(dst_ty);
826         VG_(tool_panic)("mkPCastTo(2)");
827   }
828}
829
830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831/*
832   Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833   PCasting to Ity_U1.  However, sometimes it is necessary to be more
834   accurate.  The insight is that the result is defined if two
835   corresponding bits can be found, one from each argument, so that
836   both bits are defined but are different -- that makes EQ say "No"
837   and NE say "Yes".  Hence, we compute an improvement term and DifD
838   it onto the "normal" (UifU) result.
839
840   The result is:
841
842   PCastTo<1> (
843      -- naive version
844      PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
846      `DifD<sz>`
847
848      -- improvement term
849      PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
850   )
851
852   where
853     vec contains 0 (defined) bits where the corresponding arg bits
854     are defined but different, and 1 bits otherwise.
855
856     vec = Or<sz>( vxx,   // 0 iff bit defined
857                   vyy,   // 0 iff bit defined
858                   Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859                 )
860
861     If any bit of vec is 0, the result is defined and so the
862     improvement term should produce 0...0, else it should produce
863     1...1.
864
865     Hence require for the improvement term:
866
867        if vec == 1...1 then 1...1 else 0...0
868     ->
869        PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871   This was extensively re-analysed and checked on 6 July 05.
872*/
873static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
874                                    IRType  ty,
875                                    IRAtom* vxx, IRAtom* vyy,
876                                    IRAtom* xx,  IRAtom* yy )
877{
878   IRAtom *naive, *vec, *improvement_term;
879   IRAtom *improved, *final_cast, *top;
880   IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
881
882   tl_assert(isShadowAtom(mce,vxx));
883   tl_assert(isShadowAtom(mce,vyy));
884   tl_assert(isOriginalAtom(mce,xx));
885   tl_assert(isOriginalAtom(mce,yy));
886   tl_assert(sameKindedAtoms(vxx,xx));
887   tl_assert(sameKindedAtoms(vyy,yy));
888
889   switch (ty) {
890      case Ity_I16:
891         opOR   = Iop_Or16;
892         opDIFD = Iop_And16;
893         opUIFU = Iop_Or16;
894         opNOT  = Iop_Not16;
895         opXOR  = Iop_Xor16;
896         opCMP  = Iop_CmpEQ16;
897         top    = mkU16(0xFFFF);
898         break;
899      case Ity_I32:
900         opOR   = Iop_Or32;
901         opDIFD = Iop_And32;
902         opUIFU = Iop_Or32;
903         opNOT  = Iop_Not32;
904         opXOR  = Iop_Xor32;
905         opCMP  = Iop_CmpEQ32;
906         top    = mkU32(0xFFFFFFFF);
907         break;
908      case Ity_I64:
909         opOR   = Iop_Or64;
910         opDIFD = Iop_And64;
911         opUIFU = Iop_Or64;
912         opNOT  = Iop_Not64;
913         opXOR  = Iop_Xor64;
914         opCMP  = Iop_CmpEQ64;
915         top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
916         break;
917      default:
918         VG_(tool_panic)("expensiveCmpEQorNE");
919   }
920
921   naive
922      = mkPCastTo(mce,ty,
923                  assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
924
925   vec
926      = assignNew(
927           'V', mce,ty,
928           binop( opOR,
929                  assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
930                  assignNew(
931                     'V', mce,ty,
932                     unop( opNOT,
933                           assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
934
935   improvement_term
936      = mkPCastTo( mce,ty,
937                   assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
938
939   improved
940      = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
941
942   final_cast
943      = mkPCastTo( mce, Ity_I1, improved );
944
945   return final_cast;
946}
947
948
949/* --------- Semi-accurate interpretation of CmpORD. --------- */
950
951/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
952
953      CmpORD32S(x,y) = 1<<3   if  x <s y
954                     = 1<<2   if  x >s y
955                     = 1<<1   if  x == y
956
957   and similarly the unsigned variant.  The default interpretation is:
958
959      CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
960                                  & (7<<1)
961
962   The "& (7<<1)" reflects the fact that all result bits except 3,2,1
963   are zero and therefore defined (viz, zero).
964
965   Also deal with a special case better:
966
967      CmpORD32S(x,0)
968
969   Here, bit 3 (LT) of the result is a copy of the top bit of x and
970   will be defined even if the rest of x isn't.  In which case we do:
971
972      CmpORD32S#(x,x#,0,{impliedly 0}#)
973         = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
974           | (x# >>u 31) << 3      -- LT# = x#[31]
975
976   Analogous handling for CmpORD64{S,U}.
977*/
978static Bool isZeroU32 ( IRAtom* e )
979{
980   return
981      toBool( e->tag == Iex_Const
982              && e->Iex.Const.con->tag == Ico_U32
983              && e->Iex.Const.con->Ico.U32 == 0 );
984}
985
986static Bool isZeroU64 ( IRAtom* e )
987{
988   return
989      toBool( e->tag == Iex_Const
990              && e->Iex.Const.con->tag == Ico_U64
991              && e->Iex.Const.con->Ico.U64 == 0 );
992}
993
994static IRAtom* doCmpORD ( MCEnv*  mce,
995                          IROp    cmp_op,
996                          IRAtom* xxhash, IRAtom* yyhash,
997                          IRAtom* xx,     IRAtom* yy )
998{
999   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
1000   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
1001   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
1002   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
1003   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
1004   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
1005   IRType ty     = m64 ? Ity_I64   : Ity_I32;
1006   Int    width  = m64 ? 64        : 32;
1007
1008   Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1009
1010   IRAtom* threeLeft1 = NULL;
1011   IRAtom* sevenLeft1 = NULL;
1012
1013   tl_assert(isShadowAtom(mce,xxhash));
1014   tl_assert(isShadowAtom(mce,yyhash));
1015   tl_assert(isOriginalAtom(mce,xx));
1016   tl_assert(isOriginalAtom(mce,yy));
1017   tl_assert(sameKindedAtoms(xxhash,xx));
1018   tl_assert(sameKindedAtoms(yyhash,yy));
1019   tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1020             || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1021
1022   if (0) {
1023      ppIROp(cmp_op); VG_(printf)(" ");
1024      ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1025   }
1026
1027   if (syned && isZero(yy)) {
1028      /* fancy interpretation */
1029      /* if yy is zero, then it must be fully defined (zero#). */
1030      tl_assert(isZero(yyhash));
1031      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1032      return
1033         binop(
1034            opOR,
1035            assignNew(
1036               'V', mce,ty,
1037               binop(
1038                  opAND,
1039                  mkPCastTo(mce,ty, xxhash),
1040                  threeLeft1
1041               )),
1042            assignNew(
1043               'V', mce,ty,
1044               binop(
1045                  opSHL,
1046                  assignNew(
1047                     'V', mce,ty,
1048                     binop(opSHR, xxhash, mkU8(width-1))),
1049                  mkU8(3)
1050               ))
1051	 );
1052   } else {
1053      /* standard interpretation */
1054      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1055      return
1056         binop(
1057            opAND,
1058            mkPCastTo( mce,ty,
1059                       mkUifU(mce,ty, xxhash,yyhash)),
1060            sevenLeft1
1061         );
1062   }
1063}
1064
1065
1066/*------------------------------------------------------------*/
1067/*--- Emit a test and complaint if something is undefined. ---*/
1068/*------------------------------------------------------------*/
1069
1070static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1071
1072
1073/* Set the annotations on a dirty helper to indicate that the stack
1074   pointer and instruction pointers might be read.  This is the
1075   behaviour of all 'emit-a-complaint' style functions we might
1076   call. */
1077
1078static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1079   di->nFxState = 2;
1080   di->fxState[0].fx        = Ifx_Read;
1081   di->fxState[0].offset    = mce->layout->offset_SP;
1082   di->fxState[0].size      = mce->layout->sizeof_SP;
1083   di->fxState[0].nRepeats  = 0;
1084   di->fxState[0].repeatLen = 0;
1085   di->fxState[1].fx        = Ifx_Read;
1086   di->fxState[1].offset    = mce->layout->offset_IP;
1087   di->fxState[1].size      = mce->layout->sizeof_IP;
1088   di->fxState[1].nRepeats  = 0;
1089   di->fxState[1].repeatLen = 0;
1090}
1091
1092
1093/* Check the supplied **original** atom for undefinedness, and emit a
1094   complaint if so.  Once that happens, mark it as defined.  This is
1095   possible because the atom is either a tmp or literal.  If it's a
1096   tmp, it will be shadowed by a tmp, and so we can set the shadow to
1097   be defined.  In fact as mentioned above, we will have to allocate a
1098   new tmp to carry the new 'defined' shadow value, and update the
1099   original->tmp mapping accordingly; we cannot simply assign a new
1100   value to an existing shadow tmp as this breaks SSAness -- resulting
1101   in the post-instrumentation sanity checker spluttering in disapproval.
1102*/
1103static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1104{
1105   IRAtom*  vatom;
1106   IRType   ty;
1107   Int      sz;
1108   IRDirty* di;
1109   IRAtom*  cond;
1110   IRAtom*  origin;
1111   void*    fn;
1112   const HChar* nm;
1113   IRExpr** args;
1114   Int      nargs;
1115
1116   // Don't do V bit tests if we're not reporting undefined value errors.
1117   if (MC_(clo_mc_level) == 1)
1118      return;
1119
1120   /* Since the original expression is atomic, there's no duplicated
1121      work generated by making multiple V-expressions for it.  So we
1122      don't really care about the possibility that someone else may
1123      also create a V-interpretion for it. */
1124   tl_assert(isOriginalAtom(mce, atom));
1125   vatom = expr2vbits( mce, atom );
1126   tl_assert(isShadowAtom(mce, vatom));
1127   tl_assert(sameKindedAtoms(atom, vatom));
1128
1129   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1130
1131   /* sz is only used for constructing the error message */
1132   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1133
1134   cond = mkPCastTo( mce, Ity_I1, vatom );
1135   /* cond will be 0 if all defined, and 1 if any not defined. */
1136
1137   /* Get the origin info for the value we are about to check.  At
1138      least, if we are doing origin tracking.  If not, use a dummy
1139      zero origin. */
1140   if (MC_(clo_mc_level) == 3) {
1141      origin = schemeE( mce, atom );
1142      if (mce->hWordTy == Ity_I64) {
1143         origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1144      }
1145   } else {
1146      origin = NULL;
1147   }
1148
1149   fn    = NULL;
1150   nm    = NULL;
1151   args  = NULL;
1152   nargs = -1;
1153
1154   switch (sz) {
1155      case 0:
1156         if (origin) {
1157            fn    = &MC_(helperc_value_check0_fail_w_o);
1158            nm    = "MC_(helperc_value_check0_fail_w_o)";
1159            args  = mkIRExprVec_1(origin);
1160            nargs = 1;
1161         } else {
1162            fn    = &MC_(helperc_value_check0_fail_no_o);
1163            nm    = "MC_(helperc_value_check0_fail_no_o)";
1164            args  = mkIRExprVec_0();
1165            nargs = 0;
1166         }
1167         break;
1168      case 1:
1169         if (origin) {
1170            fn    = &MC_(helperc_value_check1_fail_w_o);
1171            nm    = "MC_(helperc_value_check1_fail_w_o)";
1172            args  = mkIRExprVec_1(origin);
1173            nargs = 1;
1174         } else {
1175            fn    = &MC_(helperc_value_check1_fail_no_o);
1176            nm    = "MC_(helperc_value_check1_fail_no_o)";
1177            args  = mkIRExprVec_0();
1178            nargs = 0;
1179         }
1180         break;
1181      case 4:
1182         if (origin) {
1183            fn    = &MC_(helperc_value_check4_fail_w_o);
1184            nm    = "MC_(helperc_value_check4_fail_w_o)";
1185            args  = mkIRExprVec_1(origin);
1186            nargs = 1;
1187         } else {
1188            fn    = &MC_(helperc_value_check4_fail_no_o);
1189            nm    = "MC_(helperc_value_check4_fail_no_o)";
1190            args  = mkIRExprVec_0();
1191            nargs = 0;
1192         }
1193         break;
1194      case 8:
1195         if (origin) {
1196            fn    = &MC_(helperc_value_check8_fail_w_o);
1197            nm    = "MC_(helperc_value_check8_fail_w_o)";
1198            args  = mkIRExprVec_1(origin);
1199            nargs = 1;
1200         } else {
1201            fn    = &MC_(helperc_value_check8_fail_no_o);
1202            nm    = "MC_(helperc_value_check8_fail_no_o)";
1203            args  = mkIRExprVec_0();
1204            nargs = 0;
1205         }
1206         break;
1207      case 2:
1208      case 16:
1209         if (origin) {
1210            fn    = &MC_(helperc_value_checkN_fail_w_o);
1211            nm    = "MC_(helperc_value_checkN_fail_w_o)";
1212            args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1213            nargs = 2;
1214         } else {
1215            fn    = &MC_(helperc_value_checkN_fail_no_o);
1216            nm    = "MC_(helperc_value_checkN_fail_no_o)";
1217            args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1218            nargs = 1;
1219         }
1220         break;
1221      default:
1222         VG_(tool_panic)("unexpected szB");
1223   }
1224
1225   tl_assert(fn);
1226   tl_assert(nm);
1227   tl_assert(args);
1228   tl_assert(nargs >= 0 && nargs <= 2);
1229   tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1230              || (MC_(clo_mc_level) == 2 && origin == NULL) );
1231
1232   di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1233                           VG_(fnptr_to_fnentry)( fn ), args );
1234   di->guard = cond;
1235
1236   /* If the complaint is to be issued under a guard condition, AND that
1237      guard condition. */
1238   if (guard) {
1239     IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1240     IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1241     IRAtom *e  = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1242
1243     di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1244   }
1245
1246   setHelperAnns( mce, di );
1247   stmt( 'V', mce, IRStmt_Dirty(di));
1248
1249   /* Set the shadow tmp to be defined.  First, update the
1250      orig->shadow tmp mapping to reflect the fact that this shadow is
1251      getting a new value. */
1252   tl_assert(isIRAtom(vatom));
1253   /* sameKindedAtoms ... */
1254   if (vatom->tag == Iex_RdTmp) {
1255      tl_assert(atom->tag == Iex_RdTmp);
1256      newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1257      assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1258                       definedOfType(ty));
1259   }
1260}
1261
1262
1263/*------------------------------------------------------------*/
1264/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1265/*------------------------------------------------------------*/
1266
1267/* Examine the always-defined sections declared in layout to see if
1268   the (offset,size) section is within one.  Note, is is an error to
1269   partially fall into such a region: (offset,size) should either be
1270   completely in such a region or completely not-in such a region.
1271*/
1272static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1273{
1274   Int minoffD, maxoffD, i;
1275   Int minoff = offset;
1276   Int maxoff = minoff + size - 1;
1277   tl_assert((minoff & ~0xFFFF) == 0);
1278   tl_assert((maxoff & ~0xFFFF) == 0);
1279
1280   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1281      minoffD = mce->layout->alwaysDefd[i].offset;
1282      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1283      tl_assert((minoffD & ~0xFFFF) == 0);
1284      tl_assert((maxoffD & ~0xFFFF) == 0);
1285
1286      if (maxoff < minoffD || maxoffD < minoff)
1287         continue; /* no overlap */
1288      if (minoff >= minoffD && maxoff <= maxoffD)
1289         return True; /* completely contained in an always-defd section */
1290
1291      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1292   }
1293   return False; /* could not find any containing section */
1294}
1295
1296
1297/* Generate into bb suitable actions to shadow this Put.  If the state
1298   slice is marked 'always defined', do nothing.  Otherwise, write the
1299   supplied V bits to the shadow state.  We can pass in either an
1300   original atom or a V-atom, but not both.  In the former case the
1301   relevant V-bits are then generated from the original.
1302   We assume here, that the definedness of GUARD has already been checked.
1303*/
1304static
1305void do_shadow_PUT ( MCEnv* mce,  Int offset,
1306                     IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1307{
1308   IRType ty;
1309
1310   // Don't do shadow PUTs if we're not doing undefined value checking.
1311   // Their absence lets Vex's optimiser remove all the shadow computation
1312   // that they depend on, which includes GETs of the shadow registers.
1313   if (MC_(clo_mc_level) == 1)
1314      return;
1315
1316   if (atom) {
1317      tl_assert(!vatom);
1318      tl_assert(isOriginalAtom(mce, atom));
1319      vatom = expr2vbits( mce, atom );
1320   } else {
1321      tl_assert(vatom);
1322      tl_assert(isShadowAtom(mce, vatom));
1323   }
1324
1325   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1326   tl_assert(ty != Ity_I1);
1327   tl_assert(ty != Ity_I128);
1328   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1329      /* later: no ... */
1330      /* emit code to emit a complaint if any of the vbits are 1. */
1331      /* complainIfUndefined(mce, atom); */
1332   } else {
1333      /* Do a plain shadow Put. */
1334      if (guard) {
1335         /* If the guard expression evaluates to false we simply Put the value
1336            that is already stored in the guest state slot */
1337         IRAtom *cond, *iffalse;
1338
1339         cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1340         iffalse = assignNew('V', mce, ty,
1341                             IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1342         vatom   = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1343      }
1344      stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1345   }
1346}
1347
1348
1349/* Return an expression which contains the V bits corresponding to the
1350   given GETI (passed in in pieces).
1351*/
1352static
1353void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1354{
1355   IRAtom* vatom;
1356   IRType  ty, tyS;
1357   Int     arrSize;;
1358   IRRegArray* descr = puti->descr;
1359   IRAtom*     ix    = puti->ix;
1360   Int         bias  = puti->bias;
1361   IRAtom*     atom  = puti->data;
1362
1363   // Don't do shadow PUTIs if we're not doing undefined value checking.
1364   // Their absence lets Vex's optimiser remove all the shadow computation
1365   // that they depend on, which includes GETIs of the shadow registers.
1366   if (MC_(clo_mc_level) == 1)
1367      return;
1368
1369   tl_assert(isOriginalAtom(mce,atom));
1370   vatom = expr2vbits( mce, atom );
1371   tl_assert(sameKindedAtoms(atom, vatom));
1372   ty   = descr->elemTy;
1373   tyS  = shadowTypeV(ty);
1374   arrSize = descr->nElems * sizeofIRType(ty);
1375   tl_assert(ty != Ity_I1);
1376   tl_assert(isOriginalAtom(mce,ix));
1377   complainIfUndefined(mce, ix, NULL);
1378   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1379      /* later: no ... */
1380      /* emit code to emit a complaint if any of the vbits are 1. */
1381      /* complainIfUndefined(mce, atom); */
1382   } else {
1383      /* Do a cloned version of the Put that refers to the shadow
1384         area. */
1385      IRRegArray* new_descr
1386         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1387                         tyS, descr->nElems);
1388      stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1389   }
1390}
1391
1392
1393/* Return an expression which contains the V bits corresponding to the
1394   given GET (passed in in pieces).
1395*/
1396static
1397IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1398{
1399   IRType tyS = shadowTypeV(ty);
1400   tl_assert(ty != Ity_I1);
1401   tl_assert(ty != Ity_I128);
1402   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1403      /* Always defined, return all zeroes of the relevant type */
1404      return definedOfType(tyS);
1405   } else {
1406      /* return a cloned version of the Get that refers to the shadow
1407         area. */
1408      /* FIXME: this isn't an atom! */
1409      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1410   }
1411}
1412
1413
1414/* Return an expression which contains the V bits corresponding to the
1415   given GETI (passed in in pieces).
1416*/
1417static
1418IRExpr* shadow_GETI ( MCEnv* mce,
1419                      IRRegArray* descr, IRAtom* ix, Int bias )
1420{
1421   IRType ty   = descr->elemTy;
1422   IRType tyS  = shadowTypeV(ty);
1423   Int arrSize = descr->nElems * sizeofIRType(ty);
1424   tl_assert(ty != Ity_I1);
1425   tl_assert(isOriginalAtom(mce,ix));
1426   complainIfUndefined(mce, ix, NULL);
1427   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1428      /* Always defined, return all zeroes of the relevant type */
1429      return definedOfType(tyS);
1430   } else {
1431      /* return a cloned version of the Get that refers to the shadow
1432         area. */
1433      IRRegArray* new_descr
1434         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1435                         tyS, descr->nElems);
1436      return IRExpr_GetI( new_descr, ix, bias );
1437   }
1438}
1439
1440
1441/*------------------------------------------------------------*/
1442/*--- Generating approximations for unknown operations,    ---*/
1443/*--- using lazy-propagate semantics                       ---*/
1444/*------------------------------------------------------------*/
1445
1446/* Lazy propagation of undefinedness from two values, resulting in the
1447   specified shadow type.
1448*/
1449static
1450IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1451{
1452   IRAtom* at;
1453   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1454   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1455   tl_assert(isShadowAtom(mce,va1));
1456   tl_assert(isShadowAtom(mce,va2));
1457
1458   /* The general case is inefficient because PCast is an expensive
1459      operation.  Here are some special cases which use PCast only
1460      once rather than twice. */
1461
1462   /* I64 x I64 -> I64 */
1463   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1464      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1465      at = mkUifU(mce, Ity_I64, va1, va2);
1466      at = mkPCastTo(mce, Ity_I64, at);
1467      return at;
1468   }
1469
1470   /* I64 x I64 -> I32 */
1471   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1472      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1473      at = mkUifU(mce, Ity_I64, va1, va2);
1474      at = mkPCastTo(mce, Ity_I32, at);
1475      return at;
1476   }
1477
1478   if (0) {
1479      VG_(printf)("mkLazy2 ");
1480      ppIRType(t1);
1481      VG_(printf)("_");
1482      ppIRType(t2);
1483      VG_(printf)("_");
1484      ppIRType(finalVty);
1485      VG_(printf)("\n");
1486   }
1487
1488   /* General case: force everything via 32-bit intermediaries. */
1489   at = mkPCastTo(mce, Ity_I32, va1);
1490   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1491   at = mkPCastTo(mce, finalVty, at);
1492   return at;
1493}
1494
1495
1496/* 3-arg version of the above. */
1497static
1498IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1499                  IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1500{
1501   IRAtom* at;
1502   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1503   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1504   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1505   tl_assert(isShadowAtom(mce,va1));
1506   tl_assert(isShadowAtom(mce,va2));
1507   tl_assert(isShadowAtom(mce,va3));
1508
1509   /* The general case is inefficient because PCast is an expensive
1510      operation.  Here are some special cases which use PCast only
1511      twice rather than three times. */
1512
1513   /* I32 x I64 x I64 -> I64 */
1514   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1515   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1516       && finalVty == Ity_I64) {
1517      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1518      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1519         mode indication which is fully defined, this should get
1520         folded out later. */
1521      at = mkPCastTo(mce, Ity_I64, va1);
1522      /* Now fold in 2nd and 3rd args. */
1523      at = mkUifU(mce, Ity_I64, at, va2);
1524      at = mkUifU(mce, Ity_I64, at, va3);
1525      /* and PCast once again. */
1526      at = mkPCastTo(mce, Ity_I64, at);
1527      return at;
1528   }
1529
1530   /* I32 x I64 x I64 -> I32 */
1531   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1532       && finalVty == Ity_I32) {
1533      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1534      at = mkPCastTo(mce, Ity_I64, va1);
1535      at = mkUifU(mce, Ity_I64, at, va2);
1536      at = mkUifU(mce, Ity_I64, at, va3);
1537      at = mkPCastTo(mce, Ity_I32, at);
1538      return at;
1539   }
1540
1541   /* I32 x I32 x I32 -> I32 */
1542   /* 32-bit FP idiom, as (eg) happens on ARM */
1543   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1544       && finalVty == Ity_I32) {
1545      if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1546      at = va1;
1547      at = mkUifU(mce, Ity_I32, at, va2);
1548      at = mkUifU(mce, Ity_I32, at, va3);
1549      at = mkPCastTo(mce, Ity_I32, at);
1550      return at;
1551   }
1552
1553   /* I32 x I128 x I128 -> I128 */
1554   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1555   if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1556       && finalVty == Ity_I128) {
1557      if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1558      /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1559         mode indication which is fully defined, this should get
1560         folded out later. */
1561      at = mkPCastTo(mce, Ity_I128, va1);
1562      /* Now fold in 2nd and 3rd args. */
1563      at = mkUifU(mce, Ity_I128, at, va2);
1564      at = mkUifU(mce, Ity_I128, at, va3);
1565      /* and PCast once again. */
1566      at = mkPCastTo(mce, Ity_I128, at);
1567      return at;
1568   }
1569   if (1) {
1570      VG_(printf)("mkLazy3: ");
1571      ppIRType(t1);
1572      VG_(printf)(" x ");
1573      ppIRType(t2);
1574      VG_(printf)(" x ");
1575      ppIRType(t3);
1576      VG_(printf)(" -> ");
1577      ppIRType(finalVty);
1578      VG_(printf)("\n");
1579   }
1580
1581   tl_assert(0);
1582   /* General case: force everything via 32-bit intermediaries. */
1583   /*
1584   at = mkPCastTo(mce, Ity_I32, va1);
1585   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1586   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1587   at = mkPCastTo(mce, finalVty, at);
1588   return at;
1589   */
1590}
1591
1592
1593/* 4-arg version of the above. */
1594static
1595IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1596                  IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1597{
1598   IRAtom* at;
1599   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1600   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1601   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1602   IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1603   tl_assert(isShadowAtom(mce,va1));
1604   tl_assert(isShadowAtom(mce,va2));
1605   tl_assert(isShadowAtom(mce,va3));
1606   tl_assert(isShadowAtom(mce,va4));
1607
1608   /* The general case is inefficient because PCast is an expensive
1609      operation.  Here are some special cases which use PCast only
1610      twice rather than three times. */
1611
1612   /* I32 x I64 x I64 x I64 -> I64 */
1613   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1614   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1615       && finalVty == Ity_I64) {
1616      if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1617      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1618         mode indication which is fully defined, this should get
1619         folded out later. */
1620      at = mkPCastTo(mce, Ity_I64, va1);
1621      /* Now fold in 2nd, 3rd, 4th args. */
1622      at = mkUifU(mce, Ity_I64, at, va2);
1623      at = mkUifU(mce, Ity_I64, at, va3);
1624      at = mkUifU(mce, Ity_I64, at, va4);
1625      /* and PCast once again. */
1626      at = mkPCastTo(mce, Ity_I64, at);
1627      return at;
1628   }
1629   /* I32 x I32 x I32 x I32 -> I32 */
1630   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1631   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1632       && finalVty == Ity_I32) {
1633      if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1634      at = va1;
1635      /* Now fold in 2nd, 3rd, 4th args. */
1636      at = mkUifU(mce, Ity_I32, at, va2);
1637      at = mkUifU(mce, Ity_I32, at, va3);
1638      at = mkUifU(mce, Ity_I32, at, va4);
1639      at = mkPCastTo(mce, Ity_I32, at);
1640      return at;
1641   }
1642
1643   if (1) {
1644      VG_(printf)("mkLazy4: ");
1645      ppIRType(t1);
1646      VG_(printf)(" x ");
1647      ppIRType(t2);
1648      VG_(printf)(" x ");
1649      ppIRType(t3);
1650      VG_(printf)(" x ");
1651      ppIRType(t4);
1652      VG_(printf)(" -> ");
1653      ppIRType(finalVty);
1654      VG_(printf)("\n");
1655   }
1656
1657   tl_assert(0);
1658}
1659
1660
1661/* Do the lazy propagation game from a null-terminated vector of
1662   atoms.  This is presumably the arguments to a helper call, so the
1663   IRCallee info is also supplied in order that we can know which
1664   arguments should be ignored (via the .mcx_mask field).
1665*/
1666static
1667IRAtom* mkLazyN ( MCEnv* mce,
1668                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1669{
1670   Int     i;
1671   IRAtom* here;
1672   IRAtom* curr;
1673   IRType  mergeTy;
1674   Bool    mergeTy64 = True;
1675
1676   /* Decide on the type of the merge intermediary.  If all relevant
1677      args are I64, then it's I64.  In all other circumstances, use
1678      I32. */
1679   for (i = 0; exprvec[i]; i++) {
1680      tl_assert(i < 32);
1681      tl_assert(isOriginalAtom(mce, exprvec[i]));
1682      if (cee->mcx_mask & (1<<i))
1683         continue;
1684      if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1685         mergeTy64 = False;
1686   }
1687
1688   mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1689   curr    = definedOfType(mergeTy);
1690
1691   for (i = 0; exprvec[i]; i++) {
1692      tl_assert(i < 32);
1693      tl_assert(isOriginalAtom(mce, exprvec[i]));
1694      /* Only take notice of this arg if the callee's mc-exclusion
1695         mask does not say it is to be excluded. */
1696      if (cee->mcx_mask & (1<<i)) {
1697         /* the arg is to be excluded from definedness checking.  Do
1698            nothing. */
1699         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1700      } else {
1701         /* calculate the arg's definedness, and pessimistically merge
1702            it in. */
1703         here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1704         curr = mergeTy64
1705                   ? mkUifU64(mce, here, curr)
1706                   : mkUifU32(mce, here, curr);
1707      }
1708   }
1709   return mkPCastTo(mce, finalVtype, curr );
1710}
1711
1712
1713/*------------------------------------------------------------*/
1714/*--- Generating expensive sequences for exact carry-chain ---*/
1715/*--- propagation in add/sub and related operations.       ---*/
1716/*------------------------------------------------------------*/
1717
1718static
1719IRAtom* expensiveAddSub ( MCEnv*  mce,
1720                          Bool    add,
1721                          IRType  ty,
1722                          IRAtom* qaa, IRAtom* qbb,
1723                          IRAtom* aa,  IRAtom* bb )
1724{
1725   IRAtom *a_min, *b_min, *a_max, *b_max;
1726   IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1727
1728   tl_assert(isShadowAtom(mce,qaa));
1729   tl_assert(isShadowAtom(mce,qbb));
1730   tl_assert(isOriginalAtom(mce,aa));
1731   tl_assert(isOriginalAtom(mce,bb));
1732   tl_assert(sameKindedAtoms(qaa,aa));
1733   tl_assert(sameKindedAtoms(qbb,bb));
1734
1735   switch (ty) {
1736      case Ity_I32:
1737         opAND = Iop_And32;
1738         opOR  = Iop_Or32;
1739         opXOR = Iop_Xor32;
1740         opNOT = Iop_Not32;
1741         opADD = Iop_Add32;
1742         opSUB = Iop_Sub32;
1743         break;
1744      case Ity_I64:
1745         opAND = Iop_And64;
1746         opOR  = Iop_Or64;
1747         opXOR = Iop_Xor64;
1748         opNOT = Iop_Not64;
1749         opADD = Iop_Add64;
1750         opSUB = Iop_Sub64;
1751         break;
1752      default:
1753         VG_(tool_panic)("expensiveAddSub");
1754   }
1755
1756   // a_min = aa & ~qaa
1757   a_min = assignNew('V', mce,ty,
1758                     binop(opAND, aa,
1759                                  assignNew('V', mce,ty, unop(opNOT, qaa))));
1760
1761   // b_min = bb & ~qbb
1762   b_min = assignNew('V', mce,ty,
1763                     binop(opAND, bb,
1764                                  assignNew('V', mce,ty, unop(opNOT, qbb))));
1765
1766   // a_max = aa | qaa
1767   a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1768
1769   // b_max = bb | qbb
1770   b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1771
1772   if (add) {
1773      // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1774      return
1775      assignNew('V', mce,ty,
1776         binop( opOR,
1777                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1778                assignNew('V', mce,ty,
1779                   binop( opXOR,
1780                          assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1781                          assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1782                   )
1783                )
1784         )
1785      );
1786   } else {
1787      // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1788      return
1789      assignNew('V', mce,ty,
1790         binop( opOR,
1791                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1792                assignNew('V', mce,ty,
1793                   binop( opXOR,
1794                          assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1795                          assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1796                   )
1797                )
1798         )
1799      );
1800   }
1801
1802}
1803
1804
1805static
1806IRAtom* expensiveCountTrailingZeroes ( MCEnv* mce, IROp czop,
1807                                       IRAtom* atom, IRAtom* vatom )
1808{
1809   IRType ty;
1810   IROp xorOp, subOp, andOp;
1811   IRExpr *one;
1812   IRAtom *improver, *improved;
1813   tl_assert(isShadowAtom(mce,vatom));
1814   tl_assert(isOriginalAtom(mce,atom));
1815   tl_assert(sameKindedAtoms(atom,vatom));
1816
1817   switch (czop) {
1818      case Iop_Ctz32:
1819         ty = Ity_I32;
1820         xorOp = Iop_Xor32;
1821         subOp = Iop_Sub32;
1822         andOp = Iop_And32;
1823         one = mkU32(1);
1824         break;
1825      case Iop_Ctz64:
1826         ty = Ity_I64;
1827         xorOp = Iop_Xor64;
1828         subOp = Iop_Sub64;
1829         andOp = Iop_And64;
1830         one = mkU64(1);
1831         break;
1832      default:
1833         ppIROp(czop);
1834         VG_(tool_panic)("memcheck:expensiveCountTrailingZeroes");
1835   }
1836
1837   // improver = atom ^ (atom - 1)
1838   //
1839   // That is, improver has its low ctz(atom) bits equal to one;
1840   // higher bits (if any) equal to zero.
1841   improver = assignNew('V', mce,ty,
1842                        binop(xorOp,
1843                              atom,
1844                              assignNew('V', mce, ty,
1845                                        binop(subOp, atom, one))));
1846
1847   // improved = vatom & improver
1848   //
1849   // That is, treat any V bits above the first ctz(atom) bits as
1850   // "defined".
1851   improved = assignNew('V', mce, ty,
1852                        binop(andOp, vatom, improver));
1853
1854   // Return pessimizing cast of improved.
1855   return mkPCastTo(mce, ty, improved);
1856}
1857
1858
1859/*------------------------------------------------------------*/
1860/*--- Scalar shifts.                                       ---*/
1861/*------------------------------------------------------------*/
1862
1863/* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1864   idea is to shift the definedness bits by the original shift amount.
1865   This introduces 0s ("defined") in new positions for left shifts and
1866   unsigned right shifts, and copies the top definedness bit for
1867   signed right shifts.  So, conveniently, applying the original shift
1868   operator to the definedness bits for the left arg is exactly the
1869   right thing to do:
1870
1871      (qaa << bb)
1872
1873   However if the shift amount is undefined then the whole result
1874   is undefined.  Hence need:
1875
1876      (qaa << bb) `UifU` PCast(qbb)
1877
1878   If the shift amount bb is a literal than qbb will say 'all defined'
1879   and the UifU and PCast will get folded out by post-instrumentation
1880   optimisation.
1881*/
1882static IRAtom* scalarShift ( MCEnv*  mce,
1883                             IRType  ty,
1884                             IROp    original_op,
1885                             IRAtom* qaa, IRAtom* qbb,
1886                             IRAtom* aa,  IRAtom* bb )
1887{
1888   tl_assert(isShadowAtom(mce,qaa));
1889   tl_assert(isShadowAtom(mce,qbb));
1890   tl_assert(isOriginalAtom(mce,aa));
1891   tl_assert(isOriginalAtom(mce,bb));
1892   tl_assert(sameKindedAtoms(qaa,aa));
1893   tl_assert(sameKindedAtoms(qbb,bb));
1894   return
1895      assignNew(
1896         'V', mce, ty,
1897         mkUifU( mce, ty,
1898                 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1899                 mkPCastTo(mce, ty, qbb)
1900         )
1901   );
1902}
1903
1904
1905/*------------------------------------------------------------*/
1906/*--- Helpers for dealing with vector primops.             ---*/
1907/*------------------------------------------------------------*/
1908
1909/* Vector pessimisation -- pessimise within each lane individually. */
1910
1911static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1912{
1913   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1914}
1915
1916static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1917{
1918   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1919}
1920
1921static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1922{
1923   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1924}
1925
1926static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1927{
1928   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1929}
1930
1931static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1932{
1933   return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1934}
1935
1936static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1937{
1938   return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1939}
1940
1941static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1942{
1943   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1944}
1945
1946static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1947{
1948   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1949}
1950
1951static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1952{
1953   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1954}
1955
1956static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1957{
1958   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1959}
1960
1961static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1962{
1963   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1964}
1965
1966
1967/* Here's a simple scheme capable of handling ops derived from SSE1
1968   code and while only generating ops that can be efficiently
1969   implemented in SSE1. */
1970
1971/* All-lanes versions are straightforward:
1972
1973   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1974
1975   unary32Fx4(x,y)    ==> PCast32x4(x#)
1976
1977   Lowest-lane-only versions are more complex:
1978
1979   binary32F0x4(x,y)  ==> SetV128lo32(
1980                             x#,
1981                             PCast32(V128to32(UifUV128(x#,y#)))
1982                          )
1983
1984   This is perhaps not so obvious.  In particular, it's faster to
1985   do a V128-bit UifU and then take the bottom 32 bits than the more
1986   obvious scheme of taking the bottom 32 bits of each operand
1987   and doing a 32-bit UifU.  Basically since UifU is fast and
1988   chopping lanes off vector values is slow.
1989
1990   Finally:
1991
1992   unary32F0x4(x)     ==> SetV128lo32(
1993                             x#,
1994                             PCast32(V128to32(x#))
1995                          )
1996
1997   Where:
1998
1999   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
2000   PCast32x4(v#) = CmpNEZ32x4(v#)
2001*/
2002
2003static
2004IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2005{
2006   IRAtom* at;
2007   tl_assert(isShadowAtom(mce, vatomX));
2008   tl_assert(isShadowAtom(mce, vatomY));
2009   at = mkUifUV128(mce, vatomX, vatomY);
2010   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
2011   return at;
2012}
2013
2014static
2015IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
2016{
2017   IRAtom* at;
2018   tl_assert(isShadowAtom(mce, vatomX));
2019   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
2020   return at;
2021}
2022
2023static
2024IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2025{
2026   IRAtom* at;
2027   tl_assert(isShadowAtom(mce, vatomX));
2028   tl_assert(isShadowAtom(mce, vatomY));
2029   at = mkUifUV128(mce, vatomX, vatomY);
2030   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
2031   at = mkPCastTo(mce, Ity_I32, at);
2032   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2033   return at;
2034}
2035
2036static
2037IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
2038{
2039   IRAtom* at;
2040   tl_assert(isShadowAtom(mce, vatomX));
2041   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
2042   at = mkPCastTo(mce, Ity_I32, at);
2043   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
2044   return at;
2045}
2046
2047/* --- ... and ... 64Fx2 versions of the same ... --- */
2048
2049static
2050IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2051{
2052   IRAtom* at;
2053   tl_assert(isShadowAtom(mce, vatomX));
2054   tl_assert(isShadowAtom(mce, vatomY));
2055   at = mkUifUV128(mce, vatomX, vatomY);
2056   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
2057   return at;
2058}
2059
2060static
2061IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
2062{
2063   IRAtom* at;
2064   tl_assert(isShadowAtom(mce, vatomX));
2065   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2066   return at;
2067}
2068
2069static
2070IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2071{
2072   IRAtom* at;
2073   tl_assert(isShadowAtom(mce, vatomX));
2074   tl_assert(isShadowAtom(mce, vatomY));
2075   at = mkUifUV128(mce, vatomX, vatomY);
2076   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2077   at = mkPCastTo(mce, Ity_I64, at);
2078   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2079   return at;
2080}
2081
2082static
2083IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2084{
2085   IRAtom* at;
2086   tl_assert(isShadowAtom(mce, vatomX));
2087   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2088   at = mkPCastTo(mce, Ity_I64, at);
2089   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2090   return at;
2091}
2092
2093/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2094
2095static
2096IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2097{
2098   IRAtom* at;
2099   tl_assert(isShadowAtom(mce, vatomX));
2100   tl_assert(isShadowAtom(mce, vatomY));
2101   at = mkUifU64(mce, vatomX, vatomY);
2102   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2103   return at;
2104}
2105
2106static
2107IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2108{
2109   IRAtom* at;
2110   tl_assert(isShadowAtom(mce, vatomX));
2111   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2112   return at;
2113}
2114
2115/* --- ... and ... 64Fx4 versions of the same ... --- */
2116
2117static
2118IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2119{
2120   IRAtom* at;
2121   tl_assert(isShadowAtom(mce, vatomX));
2122   tl_assert(isShadowAtom(mce, vatomY));
2123   at = mkUifUV256(mce, vatomX, vatomY);
2124   at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2125   return at;
2126}
2127
2128static
2129IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2130{
2131   IRAtom* at;
2132   tl_assert(isShadowAtom(mce, vatomX));
2133   at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2134   return at;
2135}
2136
2137/* --- ... and ... 32Fx8 versions of the same ... --- */
2138
2139static
2140IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2141{
2142   IRAtom* at;
2143   tl_assert(isShadowAtom(mce, vatomX));
2144   tl_assert(isShadowAtom(mce, vatomY));
2145   at = mkUifUV256(mce, vatomX, vatomY);
2146   at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2147   return at;
2148}
2149
2150static
2151IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2152{
2153   IRAtom* at;
2154   tl_assert(isShadowAtom(mce, vatomX));
2155   at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2156   return at;
2157}
2158
2159/* --- --- Vector saturated narrowing --- --- */
2160
2161/* We used to do something very clever here, but on closer inspection
2162   (2011-Jun-15), and in particular bug #279698, it turns out to be
2163   wrong.  Part of the problem came from the fact that for a long
2164   time, the IR primops to do with saturated narrowing were
2165   underspecified and managed to confuse multiple cases which needed
2166   to be separate: the op names had a signedness qualifier, but in
2167   fact the source and destination signednesses needed to be specified
2168   independently, so the op names really need two independent
2169   signedness specifiers.
2170
2171   As of 2011-Jun-15 (ish) the underspecification was sorted out
2172   properly.  The incorrect instrumentation remained, though.  That
2173   has now (2011-Oct-22) been fixed.
2174
2175   What we now do is simple:
2176
2177   Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2178   number of lanes, X is the source lane width and signedness, and Y
2179   is the destination lane width and signedness.  In all cases the
2180   destination lane width is half the source lane width, so the names
2181   have a bit of redundancy, but are at least easy to read.
2182
2183   For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2184   to unsigned 16s.
2185
2186   Let Vanilla(OP) be a function that takes OP, one of these
2187   saturating narrowing ops, and produces the same "shaped" narrowing
2188   op which is not saturating, but merely dumps the most significant
2189   bits.  "same shape" means that the lane numbers and widths are the
2190   same as with OP.
2191
2192   For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2193                  = Iop_NarrowBin32to16x8,
2194   that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2195   dumping the top half of each lane.
2196
2197   So, with that in place, the scheme is simple, and it is simple to
2198   pessimise each lane individually and then apply Vanilla(OP) so as
2199   to get the result in the right "shape".  If the original OP is
2200   QNarrowBinXtoYxZ then we produce
2201
2202   Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2203
2204   or for the case when OP is unary (Iop_QNarrowUn*)
2205
2206   Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2207*/
2208static
2209IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2210{
2211   switch (qnarrowOp) {
2212      /* Binary: (128, 128) -> 128 */
2213      case Iop_QNarrowBin16Sto8Ux16:
2214      case Iop_QNarrowBin16Sto8Sx16:
2215      case Iop_QNarrowBin16Uto8Ux16:
2216         return Iop_NarrowBin16to8x16;
2217      case Iop_QNarrowBin32Sto16Ux8:
2218      case Iop_QNarrowBin32Sto16Sx8:
2219      case Iop_QNarrowBin32Uto16Ux8:
2220         return Iop_NarrowBin32to16x8;
2221      /* Binary: (64, 64) -> 64 */
2222      case Iop_QNarrowBin32Sto16Sx4:
2223         return Iop_NarrowBin32to16x4;
2224      case Iop_QNarrowBin16Sto8Ux8:
2225      case Iop_QNarrowBin16Sto8Sx8:
2226         return Iop_NarrowBin16to8x8;
2227      /* Unary: 128 -> 64 */
2228      case Iop_QNarrowUn64Uto32Ux2:
2229      case Iop_QNarrowUn64Sto32Sx2:
2230      case Iop_QNarrowUn64Sto32Ux2:
2231         return Iop_NarrowUn64to32x2;
2232      case Iop_QNarrowUn32Uto16Ux4:
2233      case Iop_QNarrowUn32Sto16Sx4:
2234      case Iop_QNarrowUn32Sto16Ux4:
2235         return Iop_NarrowUn32to16x4;
2236      case Iop_QNarrowUn16Uto8Ux8:
2237      case Iop_QNarrowUn16Sto8Sx8:
2238      case Iop_QNarrowUn16Sto8Ux8:
2239         return Iop_NarrowUn16to8x8;
2240      default:
2241         ppIROp(qnarrowOp);
2242         VG_(tool_panic)("vanillaNarrowOpOfShape");
2243   }
2244}
2245
2246static
2247IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2248                              IRAtom* vatom1, IRAtom* vatom2)
2249{
2250   IRAtom *at1, *at2, *at3;
2251   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2252   switch (narrow_op) {
2253      case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2254      case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2255      case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2256      case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2257      case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2258      case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2259      default: VG_(tool_panic)("vectorNarrowBinV128");
2260   }
2261   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2262   tl_assert(isShadowAtom(mce,vatom1));
2263   tl_assert(isShadowAtom(mce,vatom2));
2264   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2265   at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2266   at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2267   return at3;
2268}
2269
2270static
2271IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2272                            IRAtom* vatom1, IRAtom* vatom2)
2273{
2274   IRAtom *at1, *at2, *at3;
2275   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2276   switch (narrow_op) {
2277      case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2278      case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2279      case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2280      default: VG_(tool_panic)("vectorNarrowBin64");
2281   }
2282   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2283   tl_assert(isShadowAtom(mce,vatom1));
2284   tl_assert(isShadowAtom(mce,vatom2));
2285   at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2286   at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2287   at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2288   return at3;
2289}
2290
2291static
2292IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2293                             IRAtom* vatom1)
2294{
2295   IRAtom *at1, *at2;
2296   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2297   tl_assert(isShadowAtom(mce,vatom1));
2298   /* For vanilla narrowing (non-saturating), we can just apply
2299      the op directly to the V bits. */
2300   switch (narrow_op) {
2301      case Iop_NarrowUn16to8x8:
2302      case Iop_NarrowUn32to16x4:
2303      case Iop_NarrowUn64to32x2:
2304         at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2305         return at1;
2306      default:
2307         break; /* Do Plan B */
2308   }
2309   /* Plan B: for ops that involve a saturation operation on the args,
2310      we must PCast before the vanilla narrow. */
2311   switch (narrow_op) {
2312      case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2313      case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2314      case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2315      case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2316      case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2317      case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2318      case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2319      case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2320      case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2321      default: VG_(tool_panic)("vectorNarrowUnV128");
2322   }
2323   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2324   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2325   at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2326   return at2;
2327}
2328
2329static
2330IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2331                         IRAtom* vatom1)
2332{
2333   IRAtom *at1, *at2;
2334   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2335   switch (longen_op) {
2336      case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2337      case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2338      case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2339      case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2340      case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2341      case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2342      default: VG_(tool_panic)("vectorWidenI64");
2343   }
2344   tl_assert(isShadowAtom(mce,vatom1));
2345   at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2346   at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2347   return at2;
2348}
2349
2350
2351/* --- --- Vector integer arithmetic --- --- */
2352
2353/* Simple ... UifU the args and per-lane pessimise the results. */
2354
2355/* --- V128-bit versions --- */
2356
2357static
2358IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2359{
2360   IRAtom* at;
2361   at = mkUifUV128(mce, vatom1, vatom2);
2362   at = mkPCast8x16(mce, at);
2363   return at;
2364}
2365
2366static
2367IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2368{
2369   IRAtom* at;
2370   at = mkUifUV128(mce, vatom1, vatom2);
2371   at = mkPCast16x8(mce, at);
2372   return at;
2373}
2374
2375static
2376IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2377{
2378   IRAtom* at;
2379   at = mkUifUV128(mce, vatom1, vatom2);
2380   at = mkPCast32x4(mce, at);
2381   return at;
2382}
2383
2384static
2385IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2386{
2387   IRAtom* at;
2388   at = mkUifUV128(mce, vatom1, vatom2);
2389   at = mkPCast64x2(mce, at);
2390   return at;
2391}
2392
2393/* --- 64-bit versions --- */
2394
2395static
2396IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2397{
2398   IRAtom* at;
2399   at = mkUifU64(mce, vatom1, vatom2);
2400   at = mkPCast8x8(mce, at);
2401   return at;
2402}
2403
2404static
2405IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2406{
2407   IRAtom* at;
2408   at = mkUifU64(mce, vatom1, vatom2);
2409   at = mkPCast16x4(mce, at);
2410   return at;
2411}
2412
2413static
2414IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2415{
2416   IRAtom* at;
2417   at = mkUifU64(mce, vatom1, vatom2);
2418   at = mkPCast32x2(mce, at);
2419   return at;
2420}
2421
2422static
2423IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2424{
2425   IRAtom* at;
2426   at = mkUifU64(mce, vatom1, vatom2);
2427   at = mkPCastTo(mce, Ity_I64, at);
2428   return at;
2429}
2430
2431/* --- 32-bit versions --- */
2432
2433static
2434IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2435{
2436   IRAtom* at;
2437   at = mkUifU32(mce, vatom1, vatom2);
2438   at = mkPCast8x4(mce, at);
2439   return at;
2440}
2441
2442static
2443IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2444{
2445   IRAtom* at;
2446   at = mkUifU32(mce, vatom1, vatom2);
2447   at = mkPCast16x2(mce, at);
2448   return at;
2449}
2450
2451
2452/*------------------------------------------------------------*/
2453/*--- Generate shadow values from all kinds of IRExprs.    ---*/
2454/*------------------------------------------------------------*/
2455
2456static
2457IRAtom* expr2vbits_Qop ( MCEnv* mce,
2458                         IROp op,
2459                         IRAtom* atom1, IRAtom* atom2,
2460                         IRAtom* atom3, IRAtom* atom4 )
2461{
2462   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2463   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2464   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2465   IRAtom* vatom4 = expr2vbits( mce, atom4 );
2466
2467   tl_assert(isOriginalAtom(mce,atom1));
2468   tl_assert(isOriginalAtom(mce,atom2));
2469   tl_assert(isOriginalAtom(mce,atom3));
2470   tl_assert(isOriginalAtom(mce,atom4));
2471   tl_assert(isShadowAtom(mce,vatom1));
2472   tl_assert(isShadowAtom(mce,vatom2));
2473   tl_assert(isShadowAtom(mce,vatom3));
2474   tl_assert(isShadowAtom(mce,vatom4));
2475   tl_assert(sameKindedAtoms(atom1,vatom1));
2476   tl_assert(sameKindedAtoms(atom2,vatom2));
2477   tl_assert(sameKindedAtoms(atom3,vatom3));
2478   tl_assert(sameKindedAtoms(atom4,vatom4));
2479   switch (op) {
2480      case Iop_MAddF64:
2481      case Iop_MAddF64r32:
2482      case Iop_MSubF64:
2483      case Iop_MSubF64r32:
2484         /* I32(rm) x F64 x F64 x F64 -> F64 */
2485         return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2486
2487      case Iop_MAddF32:
2488      case Iop_MSubF32:
2489         /* I32(rm) x F32 x F32 x F32 -> F32 */
2490         return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2491
2492      /* V256-bit data-steering */
2493      case Iop_64x4toV256:
2494         return assignNew('V', mce, Ity_V256,
2495                          IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2496
2497      default:
2498         ppIROp(op);
2499         VG_(tool_panic)("memcheck:expr2vbits_Qop");
2500   }
2501}
2502
2503
2504static
2505IRAtom* expr2vbits_Triop ( MCEnv* mce,
2506                           IROp op,
2507                           IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2508{
2509   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2510   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2511   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2512
2513   tl_assert(isOriginalAtom(mce,atom1));
2514   tl_assert(isOriginalAtom(mce,atom2));
2515   tl_assert(isOriginalAtom(mce,atom3));
2516   tl_assert(isShadowAtom(mce,vatom1));
2517   tl_assert(isShadowAtom(mce,vatom2));
2518   tl_assert(isShadowAtom(mce,vatom3));
2519   tl_assert(sameKindedAtoms(atom1,vatom1));
2520   tl_assert(sameKindedAtoms(atom2,vatom2));
2521   tl_assert(sameKindedAtoms(atom3,vatom3));
2522   switch (op) {
2523      case Iop_AddF128:
2524      case Iop_AddD128:
2525      case Iop_SubF128:
2526      case Iop_SubD128:
2527      case Iop_MulF128:
2528      case Iop_MulD128:
2529      case Iop_DivF128:
2530      case Iop_DivD128:
2531      case Iop_QuantizeD128:
2532         /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2533         return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2534      case Iop_AddF64:
2535      case Iop_AddD64:
2536      case Iop_AddF64r32:
2537      case Iop_SubF64:
2538      case Iop_SubD64:
2539      case Iop_SubF64r32:
2540      case Iop_MulF64:
2541      case Iop_MulD64:
2542      case Iop_MulF64r32:
2543      case Iop_DivF64:
2544      case Iop_DivD64:
2545      case Iop_DivF64r32:
2546      case Iop_ScaleF64:
2547      case Iop_Yl2xF64:
2548      case Iop_Yl2xp1F64:
2549      case Iop_AtanF64:
2550      case Iop_PRemF64:
2551      case Iop_PRem1F64:
2552      case Iop_QuantizeD64:
2553         /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2554         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2555      case Iop_PRemC3210F64:
2556      case Iop_PRem1C3210F64:
2557         /* I32(rm) x F64 x F64 -> I32 */
2558         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2559      case Iop_AddF32:
2560      case Iop_SubF32:
2561      case Iop_MulF32:
2562      case Iop_DivF32:
2563         /* I32(rm) x F32 x F32 -> I32 */
2564         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2565      case Iop_SignificanceRoundD64:
2566         /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2567         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2568      case Iop_SignificanceRoundD128:
2569         /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2570         return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2571      case Iop_ExtractV128:
2572         complainIfUndefined(mce, atom3, NULL);
2573         return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2574      case Iop_Extract64:
2575         complainIfUndefined(mce, atom3, NULL);
2576         return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2577      case Iop_SetElem8x8:
2578      case Iop_SetElem16x4:
2579      case Iop_SetElem32x2:
2580         complainIfUndefined(mce, atom2, NULL);
2581         return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2582      default:
2583         ppIROp(op);
2584         VG_(tool_panic)("memcheck:expr2vbits_Triop");
2585   }
2586}
2587
2588
2589static
2590IRAtom* expr2vbits_Binop ( MCEnv* mce,
2591                           IROp op,
2592                           IRAtom* atom1, IRAtom* atom2 )
2593{
2594   IRType  and_or_ty;
2595   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2596   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2597   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2598
2599   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2600   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2601
2602   tl_assert(isOriginalAtom(mce,atom1));
2603   tl_assert(isOriginalAtom(mce,atom2));
2604   tl_assert(isShadowAtom(mce,vatom1));
2605   tl_assert(isShadowAtom(mce,vatom2));
2606   tl_assert(sameKindedAtoms(atom1,vatom1));
2607   tl_assert(sameKindedAtoms(atom2,vatom2));
2608   switch (op) {
2609
2610      /* 32-bit SIMD */
2611
2612      case Iop_Add16x2:
2613      case Iop_HAdd16Ux2:
2614      case Iop_HAdd16Sx2:
2615      case Iop_Sub16x2:
2616      case Iop_HSub16Ux2:
2617      case Iop_HSub16Sx2:
2618      case Iop_QAdd16Sx2:
2619      case Iop_QSub16Sx2:
2620      case Iop_QSub16Ux2:
2621         return binary16Ix2(mce, vatom1, vatom2);
2622
2623      case Iop_Add8x4:
2624      case Iop_HAdd8Ux4:
2625      case Iop_HAdd8Sx4:
2626      case Iop_Sub8x4:
2627      case Iop_HSub8Ux4:
2628      case Iop_HSub8Sx4:
2629      case Iop_QSub8Ux4:
2630      case Iop_QAdd8Ux4:
2631      case Iop_QSub8Sx4:
2632      case Iop_QAdd8Sx4:
2633         return binary8Ix4(mce, vatom1, vatom2);
2634
2635      /* 64-bit SIMD */
2636
2637      case Iop_ShrN8x8:
2638      case Iop_ShrN16x4:
2639      case Iop_ShrN32x2:
2640      case Iop_SarN8x8:
2641      case Iop_SarN16x4:
2642      case Iop_SarN32x2:
2643      case Iop_ShlN16x4:
2644      case Iop_ShlN32x2:
2645      case Iop_ShlN8x8:
2646         /* Same scheme as with all other shifts. */
2647         complainIfUndefined(mce, atom2, NULL);
2648         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2649
2650      case Iop_QNarrowBin32Sto16Sx4:
2651      case Iop_QNarrowBin16Sto8Sx8:
2652      case Iop_QNarrowBin16Sto8Ux8:
2653         return vectorNarrowBin64(mce, op, vatom1, vatom2);
2654
2655      case Iop_Min8Ux8:
2656      case Iop_Min8Sx8:
2657      case Iop_Max8Ux8:
2658      case Iop_Max8Sx8:
2659      case Iop_Avg8Ux8:
2660      case Iop_QSub8Sx8:
2661      case Iop_QSub8Ux8:
2662      case Iop_Sub8x8:
2663      case Iop_CmpGT8Sx8:
2664      case Iop_CmpGT8Ux8:
2665      case Iop_CmpEQ8x8:
2666      case Iop_QAdd8Sx8:
2667      case Iop_QAdd8Ux8:
2668      case Iop_QSal8x8:
2669      case Iop_QShl8x8:
2670      case Iop_Add8x8:
2671      case Iop_Mul8x8:
2672      case Iop_PolynomialMul8x8:
2673         return binary8Ix8(mce, vatom1, vatom2);
2674
2675      case Iop_Min16Sx4:
2676      case Iop_Min16Ux4:
2677      case Iop_Max16Sx4:
2678      case Iop_Max16Ux4:
2679      case Iop_Avg16Ux4:
2680      case Iop_QSub16Ux4:
2681      case Iop_QSub16Sx4:
2682      case Iop_Sub16x4:
2683      case Iop_Mul16x4:
2684      case Iop_MulHi16Sx4:
2685      case Iop_MulHi16Ux4:
2686      case Iop_CmpGT16Sx4:
2687      case Iop_CmpGT16Ux4:
2688      case Iop_CmpEQ16x4:
2689      case Iop_QAdd16Sx4:
2690      case Iop_QAdd16Ux4:
2691      case Iop_QSal16x4:
2692      case Iop_QShl16x4:
2693      case Iop_Add16x4:
2694      case Iop_QDMulHi16Sx4:
2695      case Iop_QRDMulHi16Sx4:
2696         return binary16Ix4(mce, vatom1, vatom2);
2697
2698      case Iop_Sub32x2:
2699      case Iop_Mul32x2:
2700      case Iop_Max32Sx2:
2701      case Iop_Max32Ux2:
2702      case Iop_Min32Sx2:
2703      case Iop_Min32Ux2:
2704      case Iop_CmpGT32Sx2:
2705      case Iop_CmpGT32Ux2:
2706      case Iop_CmpEQ32x2:
2707      case Iop_Add32x2:
2708      case Iop_QAdd32Ux2:
2709      case Iop_QAdd32Sx2:
2710      case Iop_QSub32Ux2:
2711      case Iop_QSub32Sx2:
2712      case Iop_QSal32x2:
2713      case Iop_QShl32x2:
2714      case Iop_QDMulHi32Sx2:
2715      case Iop_QRDMulHi32Sx2:
2716         return binary32Ix2(mce, vatom1, vatom2);
2717
2718      case Iop_QSub64Ux1:
2719      case Iop_QSub64Sx1:
2720      case Iop_QAdd64Ux1:
2721      case Iop_QAdd64Sx1:
2722      case Iop_QSal64x1:
2723      case Iop_QShl64x1:
2724      case Iop_Sal64x1:
2725         return binary64Ix1(mce, vatom1, vatom2);
2726
2727      case Iop_QShlN8Sx8:
2728      case Iop_QShlN8x8:
2729      case Iop_QSalN8x8:
2730         complainIfUndefined(mce, atom2, NULL);
2731         return mkPCast8x8(mce, vatom1);
2732
2733      case Iop_QShlN16Sx4:
2734      case Iop_QShlN16x4:
2735      case Iop_QSalN16x4:
2736         complainIfUndefined(mce, atom2, NULL);
2737         return mkPCast16x4(mce, vatom1);
2738
2739      case Iop_QShlN32Sx2:
2740      case Iop_QShlN32x2:
2741      case Iop_QSalN32x2:
2742         complainIfUndefined(mce, atom2, NULL);
2743         return mkPCast32x2(mce, vatom1);
2744
2745      case Iop_QShlN64Sx1:
2746      case Iop_QShlN64x1:
2747      case Iop_QSalN64x1:
2748         complainIfUndefined(mce, atom2, NULL);
2749         return mkPCast32x2(mce, vatom1);
2750
2751      case Iop_PwMax32Sx2:
2752      case Iop_PwMax32Ux2:
2753      case Iop_PwMin32Sx2:
2754      case Iop_PwMin32Ux2:
2755      case Iop_PwMax32Fx2:
2756      case Iop_PwMin32Fx2:
2757         return assignNew('V', mce, Ity_I64,
2758                          binop(Iop_PwMax32Ux2,
2759                                mkPCast32x2(mce, vatom1),
2760                                mkPCast32x2(mce, vatom2)));
2761
2762      case Iop_PwMax16Sx4:
2763      case Iop_PwMax16Ux4:
2764      case Iop_PwMin16Sx4:
2765      case Iop_PwMin16Ux4:
2766         return assignNew('V', mce, Ity_I64,
2767                          binop(Iop_PwMax16Ux4,
2768                                mkPCast16x4(mce, vatom1),
2769                                mkPCast16x4(mce, vatom2)));
2770
2771      case Iop_PwMax8Sx8:
2772      case Iop_PwMax8Ux8:
2773      case Iop_PwMin8Sx8:
2774      case Iop_PwMin8Ux8:
2775         return assignNew('V', mce, Ity_I64,
2776                          binop(Iop_PwMax8Ux8,
2777                                mkPCast8x8(mce, vatom1),
2778                                mkPCast8x8(mce, vatom2)));
2779
2780      case Iop_PwAdd32x2:
2781      case Iop_PwAdd32Fx2:
2782         return mkPCast32x2(mce,
2783               assignNew('V', mce, Ity_I64,
2784                         binop(Iop_PwAdd32x2,
2785                               mkPCast32x2(mce, vatom1),
2786                               mkPCast32x2(mce, vatom2))));
2787
2788      case Iop_PwAdd16x4:
2789         return mkPCast16x4(mce,
2790               assignNew('V', mce, Ity_I64,
2791                         binop(op, mkPCast16x4(mce, vatom1),
2792                                   mkPCast16x4(mce, vatom2))));
2793
2794      case Iop_PwAdd8x8:
2795         return mkPCast8x8(mce,
2796               assignNew('V', mce, Ity_I64,
2797                         binop(op, mkPCast8x8(mce, vatom1),
2798                                   mkPCast8x8(mce, vatom2))));
2799
2800      case Iop_Shl8x8:
2801      case Iop_Shr8x8:
2802      case Iop_Sar8x8:
2803      case Iop_Sal8x8:
2804         return mkUifU64(mce,
2805                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2806                   mkPCast8x8(mce,vatom2)
2807                );
2808
2809      case Iop_Shl16x4:
2810      case Iop_Shr16x4:
2811      case Iop_Sar16x4:
2812      case Iop_Sal16x4:
2813         return mkUifU64(mce,
2814                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2815                   mkPCast16x4(mce,vatom2)
2816                );
2817
2818      case Iop_Shl32x2:
2819      case Iop_Shr32x2:
2820      case Iop_Sar32x2:
2821      case Iop_Sal32x2:
2822         return mkUifU64(mce,
2823                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2824                   mkPCast32x2(mce,vatom2)
2825                );
2826
2827      /* 64-bit data-steering */
2828      case Iop_InterleaveLO32x2:
2829      case Iop_InterleaveLO16x4:
2830      case Iop_InterleaveLO8x8:
2831      case Iop_InterleaveHI32x2:
2832      case Iop_InterleaveHI16x4:
2833      case Iop_InterleaveHI8x8:
2834      case Iop_CatOddLanes8x8:
2835      case Iop_CatEvenLanes8x8:
2836      case Iop_CatOddLanes16x4:
2837      case Iop_CatEvenLanes16x4:
2838      case Iop_InterleaveOddLanes8x8:
2839      case Iop_InterleaveEvenLanes8x8:
2840      case Iop_InterleaveOddLanes16x4:
2841      case Iop_InterleaveEvenLanes16x4:
2842         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2843
2844      case Iop_GetElem8x8:
2845         complainIfUndefined(mce, atom2, NULL);
2846         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2847      case Iop_GetElem16x4:
2848         complainIfUndefined(mce, atom2, NULL);
2849         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2850      case Iop_GetElem32x2:
2851         complainIfUndefined(mce, atom2, NULL);
2852         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2853
2854      /* Perm8x8: rearrange values in left arg using steering values
2855        from right arg.  So rearrange the vbits in the same way but
2856        pessimise wrt steering values. */
2857      case Iop_Perm8x8:
2858         return mkUifU64(
2859                   mce,
2860                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2861                   mkPCast8x8(mce, vatom2)
2862                );
2863
2864      /* V128-bit SIMD */
2865
2866      case Iop_ShrN8x16:
2867      case Iop_ShrN16x8:
2868      case Iop_ShrN32x4:
2869      case Iop_ShrN64x2:
2870      case Iop_SarN8x16:
2871      case Iop_SarN16x8:
2872      case Iop_SarN32x4:
2873      case Iop_SarN64x2:
2874      case Iop_ShlN8x16:
2875      case Iop_ShlN16x8:
2876      case Iop_ShlN32x4:
2877      case Iop_ShlN64x2:
2878         /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2879            this is wrong now, scalar shifts are done properly lazily.
2880            Vector shifts should be fixed too. */
2881         complainIfUndefined(mce, atom2, NULL);
2882         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2883
2884      /* V x V shifts/rotates are done using the standard lazy scheme. */
2885      case Iop_Shl8x16:
2886      case Iop_Shr8x16:
2887      case Iop_Sar8x16:
2888      case Iop_Sal8x16:
2889      case Iop_Rol8x16:
2890         return mkUifUV128(mce,
2891                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2892                   mkPCast8x16(mce,vatom2)
2893                );
2894
2895      case Iop_Shl16x8:
2896      case Iop_Shr16x8:
2897      case Iop_Sar16x8:
2898      case Iop_Sal16x8:
2899      case Iop_Rol16x8:
2900         return mkUifUV128(mce,
2901                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2902                   mkPCast16x8(mce,vatom2)
2903                );
2904
2905      case Iop_Shl32x4:
2906      case Iop_Shr32x4:
2907      case Iop_Sar32x4:
2908      case Iop_Sal32x4:
2909      case Iop_Rol32x4:
2910         return mkUifUV128(mce,
2911                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2912                   mkPCast32x4(mce,vatom2)
2913                );
2914
2915      case Iop_Shl64x2:
2916      case Iop_Shr64x2:
2917      case Iop_Sar64x2:
2918      case Iop_Sal64x2:
2919         return mkUifUV128(mce,
2920                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2921                   mkPCast64x2(mce,vatom2)
2922                );
2923
2924      case Iop_F32ToFixed32Ux4_RZ:
2925      case Iop_F32ToFixed32Sx4_RZ:
2926      case Iop_Fixed32UToF32x4_RN:
2927      case Iop_Fixed32SToF32x4_RN:
2928         complainIfUndefined(mce, atom2, NULL);
2929         return mkPCast32x4(mce, vatom1);
2930
2931      case Iop_F32ToFixed32Ux2_RZ:
2932      case Iop_F32ToFixed32Sx2_RZ:
2933      case Iop_Fixed32UToF32x2_RN:
2934      case Iop_Fixed32SToF32x2_RN:
2935         complainIfUndefined(mce, atom2, NULL);
2936         return mkPCast32x2(mce, vatom1);
2937
2938      case Iop_QSub8Ux16:
2939      case Iop_QSub8Sx16:
2940      case Iop_Sub8x16:
2941      case Iop_Min8Ux16:
2942      case Iop_Min8Sx16:
2943      case Iop_Max8Ux16:
2944      case Iop_Max8Sx16:
2945      case Iop_CmpGT8Sx16:
2946      case Iop_CmpGT8Ux16:
2947      case Iop_CmpEQ8x16:
2948      case Iop_Avg8Ux16:
2949      case Iop_Avg8Sx16:
2950      case Iop_QAdd8Ux16:
2951      case Iop_QAdd8Sx16:
2952      case Iop_QSal8x16:
2953      case Iop_QShl8x16:
2954      case Iop_Add8x16:
2955      case Iop_Mul8x16:
2956      case Iop_PolynomialMul8x16:
2957         return binary8Ix16(mce, vatom1, vatom2);
2958
2959      case Iop_QSub16Ux8:
2960      case Iop_QSub16Sx8:
2961      case Iop_Sub16x8:
2962      case Iop_Mul16x8:
2963      case Iop_MulHi16Sx8:
2964      case Iop_MulHi16Ux8:
2965      case Iop_Min16Sx8:
2966      case Iop_Min16Ux8:
2967      case Iop_Max16Sx8:
2968      case Iop_Max16Ux8:
2969      case Iop_CmpGT16Sx8:
2970      case Iop_CmpGT16Ux8:
2971      case Iop_CmpEQ16x8:
2972      case Iop_Avg16Ux8:
2973      case Iop_Avg16Sx8:
2974      case Iop_QAdd16Ux8:
2975      case Iop_QAdd16Sx8:
2976      case Iop_QSal16x8:
2977      case Iop_QShl16x8:
2978      case Iop_Add16x8:
2979      case Iop_QDMulHi16Sx8:
2980      case Iop_QRDMulHi16Sx8:
2981         return binary16Ix8(mce, vatom1, vatom2);
2982
2983      case Iop_Sub32x4:
2984      case Iop_CmpGT32Sx4:
2985      case Iop_CmpGT32Ux4:
2986      case Iop_CmpEQ32x4:
2987      case Iop_QAdd32Sx4:
2988      case Iop_QAdd32Ux4:
2989      case Iop_QSub32Sx4:
2990      case Iop_QSub32Ux4:
2991      case Iop_QSal32x4:
2992      case Iop_QShl32x4:
2993      case Iop_Avg32Ux4:
2994      case Iop_Avg32Sx4:
2995      case Iop_Add32x4:
2996      case Iop_Max32Ux4:
2997      case Iop_Max32Sx4:
2998      case Iop_Min32Ux4:
2999      case Iop_Min32Sx4:
3000      case Iop_Mul32x4:
3001      case Iop_QDMulHi32Sx4:
3002      case Iop_QRDMulHi32Sx4:
3003         return binary32Ix4(mce, vatom1, vatom2);
3004
3005      case Iop_Sub64x2:
3006      case Iop_Add64x2:
3007      case Iop_CmpEQ64x2:
3008      case Iop_CmpGT64Sx2:
3009      case Iop_QSal64x2:
3010      case Iop_QShl64x2:
3011      case Iop_QAdd64Ux2:
3012      case Iop_QAdd64Sx2:
3013      case Iop_QSub64Ux2:
3014      case Iop_QSub64Sx2:
3015         return binary64Ix2(mce, vatom1, vatom2);
3016
3017      case Iop_QNarrowBin32Sto16Sx8:
3018      case Iop_QNarrowBin32Uto16Ux8:
3019      case Iop_QNarrowBin32Sto16Ux8:
3020      case Iop_QNarrowBin16Sto8Sx16:
3021      case Iop_QNarrowBin16Uto8Ux16:
3022      case Iop_QNarrowBin16Sto8Ux16:
3023         return vectorNarrowBinV128(mce, op, vatom1, vatom2);
3024
3025      case Iop_Sub64Fx2:
3026      case Iop_Mul64Fx2:
3027      case Iop_Min64Fx2:
3028      case Iop_Max64Fx2:
3029      case Iop_Div64Fx2:
3030      case Iop_CmpLT64Fx2:
3031      case Iop_CmpLE64Fx2:
3032      case Iop_CmpEQ64Fx2:
3033      case Iop_CmpUN64Fx2:
3034      case Iop_Add64Fx2:
3035         return binary64Fx2(mce, vatom1, vatom2);
3036
3037      case Iop_Sub64F0x2:
3038      case Iop_Mul64F0x2:
3039      case Iop_Min64F0x2:
3040      case Iop_Max64F0x2:
3041      case Iop_Div64F0x2:
3042      case Iop_CmpLT64F0x2:
3043      case Iop_CmpLE64F0x2:
3044      case Iop_CmpEQ64F0x2:
3045      case Iop_CmpUN64F0x2:
3046      case Iop_Add64F0x2:
3047         return binary64F0x2(mce, vatom1, vatom2);
3048
3049      case Iop_Sub32Fx4:
3050      case Iop_Mul32Fx4:
3051      case Iop_Min32Fx4:
3052      case Iop_Max32Fx4:
3053      case Iop_Div32Fx4:
3054      case Iop_CmpLT32Fx4:
3055      case Iop_CmpLE32Fx4:
3056      case Iop_CmpEQ32Fx4:
3057      case Iop_CmpUN32Fx4:
3058      case Iop_CmpGT32Fx4:
3059      case Iop_CmpGE32Fx4:
3060      case Iop_Add32Fx4:
3061      case Iop_Recps32Fx4:
3062      case Iop_Rsqrts32Fx4:
3063         return binary32Fx4(mce, vatom1, vatom2);
3064
3065      case Iop_Sub32Fx2:
3066      case Iop_Mul32Fx2:
3067      case Iop_Min32Fx2:
3068      case Iop_Max32Fx2:
3069      case Iop_CmpEQ32Fx2:
3070      case Iop_CmpGT32Fx2:
3071      case Iop_CmpGE32Fx2:
3072      case Iop_Add32Fx2:
3073      case Iop_Recps32Fx2:
3074      case Iop_Rsqrts32Fx2:
3075         return binary32Fx2(mce, vatom1, vatom2);
3076
3077      case Iop_Sub32F0x4:
3078      case Iop_Mul32F0x4:
3079      case Iop_Min32F0x4:
3080      case Iop_Max32F0x4:
3081      case Iop_Div32F0x4:
3082      case Iop_CmpLT32F0x4:
3083      case Iop_CmpLE32F0x4:
3084      case Iop_CmpEQ32F0x4:
3085      case Iop_CmpUN32F0x4:
3086      case Iop_Add32F0x4:
3087         return binary32F0x4(mce, vatom1, vatom2);
3088
3089      case Iop_QShlN8Sx16:
3090      case Iop_QShlN8x16:
3091      case Iop_QSalN8x16:
3092         complainIfUndefined(mce, atom2, NULL);
3093         return mkPCast8x16(mce, vatom1);
3094
3095      case Iop_QShlN16Sx8:
3096      case Iop_QShlN16x8:
3097      case Iop_QSalN16x8:
3098         complainIfUndefined(mce, atom2, NULL);
3099         return mkPCast16x8(mce, vatom1);
3100
3101      case Iop_QShlN32Sx4:
3102      case Iop_QShlN32x4:
3103      case Iop_QSalN32x4:
3104         complainIfUndefined(mce, atom2, NULL);
3105         return mkPCast32x4(mce, vatom1);
3106
3107      case Iop_QShlN64Sx2:
3108      case Iop_QShlN64x2:
3109      case Iop_QSalN64x2:
3110         complainIfUndefined(mce, atom2, NULL);
3111         return mkPCast32x4(mce, vatom1);
3112
3113      case Iop_Mull32Sx2:
3114      case Iop_Mull32Ux2:
3115      case Iop_QDMulLong32Sx2:
3116         return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3117                                    mkUifU64(mce, vatom1, vatom2));
3118
3119      case Iop_Mull16Sx4:
3120      case Iop_Mull16Ux4:
3121      case Iop_QDMulLong16Sx4:
3122         return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3123                                    mkUifU64(mce, vatom1, vatom2));
3124
3125      case Iop_Mull8Sx8:
3126      case Iop_Mull8Ux8:
3127      case Iop_PolynomialMull8x8:
3128         return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3129                                    mkUifU64(mce, vatom1, vatom2));
3130
3131      case Iop_PwAdd32x4:
3132         return mkPCast32x4(mce,
3133               assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3134                     mkPCast32x4(mce, vatom2))));
3135
3136      case Iop_PwAdd16x8:
3137         return mkPCast16x8(mce,
3138               assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3139                     mkPCast16x8(mce, vatom2))));
3140
3141      case Iop_PwAdd8x16:
3142         return mkPCast8x16(mce,
3143               assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3144                     mkPCast8x16(mce, vatom2))));
3145
3146      /* V128-bit data-steering */
3147      case Iop_SetV128lo32:
3148      case Iop_SetV128lo64:
3149      case Iop_64HLtoV128:
3150      case Iop_InterleaveLO64x2:
3151      case Iop_InterleaveLO32x4:
3152      case Iop_InterleaveLO16x8:
3153      case Iop_InterleaveLO8x16:
3154      case Iop_InterleaveHI64x2:
3155      case Iop_InterleaveHI32x4:
3156      case Iop_InterleaveHI16x8:
3157      case Iop_InterleaveHI8x16:
3158      case Iop_CatOddLanes8x16:
3159      case Iop_CatOddLanes16x8:
3160      case Iop_CatOddLanes32x4:
3161      case Iop_CatEvenLanes8x16:
3162      case Iop_CatEvenLanes16x8:
3163      case Iop_CatEvenLanes32x4:
3164      case Iop_InterleaveOddLanes8x16:
3165      case Iop_InterleaveOddLanes16x8:
3166      case Iop_InterleaveOddLanes32x4:
3167      case Iop_InterleaveEvenLanes8x16:
3168      case Iop_InterleaveEvenLanes16x8:
3169      case Iop_InterleaveEvenLanes32x4:
3170         return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3171
3172      case Iop_GetElem8x16:
3173         complainIfUndefined(mce, atom2, NULL);
3174         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3175      case Iop_GetElem16x8:
3176         complainIfUndefined(mce, atom2, NULL);
3177         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3178      case Iop_GetElem32x4:
3179         complainIfUndefined(mce, atom2, NULL);
3180         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3181      case Iop_GetElem64x2:
3182         complainIfUndefined(mce, atom2, NULL);
3183         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3184
3185     /* Perm8x16: rearrange values in left arg using steering values
3186        from right arg.  So rearrange the vbits in the same way but
3187        pessimise wrt steering values.  Perm32x4 ditto. */
3188      case Iop_Perm8x16:
3189         return mkUifUV128(
3190                   mce,
3191                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3192                   mkPCast8x16(mce, vatom2)
3193                );
3194      case Iop_Perm32x4:
3195         return mkUifUV128(
3196                   mce,
3197                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3198                   mkPCast32x4(mce, vatom2)
3199                );
3200
3201     /* These two take the lower half of each 16-bit lane, sign/zero
3202        extend it to 32, and multiply together, producing a 32x4
3203        result (and implicitly ignoring half the operand bits).  So
3204        treat it as a bunch of independent 16x8 operations, but then
3205        do 32-bit shifts left-right to copy the lower half results
3206        (which are all 0s or all 1s due to PCasting in binary16Ix8)
3207        into the upper half of each result lane. */
3208      case Iop_MullEven16Ux8:
3209      case Iop_MullEven16Sx8: {
3210         IRAtom* at;
3211         at = binary16Ix8(mce,vatom1,vatom2);
3212         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3213         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3214	 return at;
3215      }
3216
3217      /* Same deal as Iop_MullEven16{S,U}x8 */
3218      case Iop_MullEven8Ux16:
3219      case Iop_MullEven8Sx16: {
3220         IRAtom* at;
3221         at = binary8Ix16(mce,vatom1,vatom2);
3222         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3223         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3224	 return at;
3225      }
3226
3227      /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3228         32x4 -> 16x8 laneage, discarding the upper half of each lane.
3229         Simply apply same op to the V bits, since this really no more
3230         than a data steering operation. */
3231      case Iop_NarrowBin32to16x8:
3232      case Iop_NarrowBin16to8x16:
3233         return assignNew('V', mce, Ity_V128,
3234                                    binop(op, vatom1, vatom2));
3235
3236      case Iop_ShrV128:
3237      case Iop_ShlV128:
3238         /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3239            this is wrong now, scalar shifts are done properly lazily.
3240            Vector shifts should be fixed too. */
3241         complainIfUndefined(mce, atom2, NULL);
3242         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3243
3244      /* I128-bit data-steering */
3245      case Iop_64HLto128:
3246         return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3247
3248      /* V256-bit SIMD */
3249
3250      case Iop_Add64Fx4:
3251      case Iop_Sub64Fx4:
3252      case Iop_Mul64Fx4:
3253      case Iop_Div64Fx4:
3254      case Iop_Max64Fx4:
3255      case Iop_Min64Fx4:
3256         return binary64Fx4(mce, vatom1, vatom2);
3257
3258      case Iop_Add32Fx8:
3259      case Iop_Sub32Fx8:
3260      case Iop_Mul32Fx8:
3261      case Iop_Div32Fx8:
3262      case Iop_Max32Fx8:
3263      case Iop_Min32Fx8:
3264         return binary32Fx8(mce, vatom1, vatom2);
3265
3266      /* V256-bit data-steering */
3267      case Iop_V128HLtoV256:
3268         return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3269
3270      /* Scalar floating point */
3271
3272      case Iop_F32toI64S:
3273      case Iop_F32toI64U:
3274         /* I32(rm) x F32 -> I64 */
3275         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3276
3277      case Iop_I64StoF32:
3278         /* I32(rm) x I64 -> F32 */
3279         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3280
3281      case Iop_RoundF64toInt:
3282      case Iop_RoundF64toF32:
3283      case Iop_F64toI64S:
3284      case Iop_F64toI64U:
3285      case Iop_I64StoF64:
3286      case Iop_I64UtoF64:
3287      case Iop_SinF64:
3288      case Iop_CosF64:
3289      case Iop_TanF64:
3290      case Iop_2xm1F64:
3291      case Iop_SqrtF64:
3292         /* I32(rm) x I64/F64 -> I64/F64 */
3293         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3294
3295      case Iop_ShlD64:
3296      case Iop_ShrD64:
3297      case Iop_RoundD64toInt:
3298         /* I32(DFP rm) x D64 -> D64 */
3299         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3300
3301      case Iop_ShlD128:
3302      case Iop_ShrD128:
3303      case Iop_RoundD128toInt:
3304         /* I32(DFP rm) x D128 -> D128 */
3305         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3306
3307      case Iop_D64toI64S:
3308      case Iop_I64StoD64:
3309         /* I64(DFP rm) x I64 -> D64 */
3310         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3311
3312      case Iop_RoundF32toInt:
3313      case Iop_SqrtF32:
3314         /* I32(rm) x I32/F32 -> I32/F32 */
3315         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3316
3317      case Iop_SqrtF128:
3318         /* I32(rm) x F128 -> F128 */
3319         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3320
3321      case Iop_I32StoF32:
3322      case Iop_I32UtoF32:
3323      case Iop_F32toI32S:
3324      case Iop_F32toI32U:
3325         /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3326         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3327
3328      case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3329      case Iop_F128toI32U: /* IRRoundingMode(I32) x F128 -> unsigned I32  */
3330      case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3331         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3332
3333      case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3334      case Iop_F128toI64U: /* IRRoundingMode(I32) x F128 -> unsigned I64  */
3335      case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3336      case Iop_D128toD64:  /* IRRoundingModeDFP(I64) x D128 -> D64 */
3337      case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64  */
3338         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3339
3340      case Iop_F64HLtoF128:
3341      case Iop_D64HLtoD128:
3342         return assignNew('V', mce, Ity_I128,
3343                          binop(Iop_64HLto128, vatom1, vatom2));
3344
3345      case Iop_F64toI32U:
3346      case Iop_F64toI32S:
3347      case Iop_F64toF32:
3348      case Iop_I64UtoF32:
3349         /* First arg is I32 (rounding mode), second is F64 (data). */
3350         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3351
3352      case Iop_D64toD32:
3353         /* First arg is I64 (DFProunding mode), second is D64 (data). */
3354         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3355
3356      case Iop_F64toI16S:
3357         /* First arg is I32 (rounding mode), second is F64 (data). */
3358         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3359
3360      case Iop_InsertExpD64:
3361         /*  I64 x I64 -> D64 */
3362         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3363
3364      case Iop_InsertExpD128:
3365         /*  I64 x I128 -> D128 */
3366         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3367
3368      case Iop_CmpF32:
3369      case Iop_CmpF64:
3370      case Iop_CmpF128:
3371      case Iop_CmpD64:
3372      case Iop_CmpD128:
3373         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3374
3375      /* non-FP after here */
3376
3377      case Iop_DivModU64to32:
3378      case Iop_DivModS64to32:
3379         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3380
3381      case Iop_DivModU128to64:
3382      case Iop_DivModS128to64:
3383         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3384
3385      case Iop_8HLto16:
3386         return assignNew('V', mce, Ity_I16, binop(op, vatom1, vatom2));
3387      case Iop_16HLto32:
3388         return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3389      case Iop_32HLto64:
3390         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3391
3392      case Iop_DivModS64to64:
3393      case Iop_MullS64:
3394      case Iop_MullU64: {
3395         IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3396         IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3397         return assignNew('V', mce, Ity_I128,
3398                          binop(Iop_64HLto128, vHi64, vLo64));
3399      }
3400
3401      case Iop_MullS32:
3402      case Iop_MullU32: {
3403         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3404         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3405         return assignNew('V', mce, Ity_I64,
3406                          binop(Iop_32HLto64, vHi32, vLo32));
3407      }
3408
3409      case Iop_MullS16:
3410      case Iop_MullU16: {
3411         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3412         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3413         return assignNew('V', mce, Ity_I32,
3414                          binop(Iop_16HLto32, vHi16, vLo16));
3415      }
3416
3417      case Iop_MullS8:
3418      case Iop_MullU8: {
3419         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3420         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3421         return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3422      }
3423
3424      case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3425      case Iop_DivS32:
3426      case Iop_DivU32:
3427      case Iop_DivU32E:
3428      case Iop_DivS32E:
3429      case Iop_QAdd32S: /* could probably do better */
3430      case Iop_QSub32S: /* could probably do better */
3431         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3432
3433      case Iop_DivS64:
3434      case Iop_DivU64:
3435      case Iop_DivS64E:
3436      case Iop_DivU64E:
3437         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3438
3439      case Iop_Add32:
3440         if (mce->bogusLiterals || mce->useLLVMworkarounds)
3441            return expensiveAddSub(mce,True,Ity_I32,
3442                                   vatom1,vatom2, atom1,atom2);
3443         else
3444            goto cheap_AddSub32;
3445      case Iop_Sub32:
3446         if (mce->bogusLiterals)
3447            return expensiveAddSub(mce,False,Ity_I32,
3448                                   vatom1,vatom2, atom1,atom2);
3449         else
3450            goto cheap_AddSub32;
3451
3452      cheap_AddSub32:
3453      case Iop_Mul32:
3454         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3455
3456      case Iop_CmpORD32S:
3457      case Iop_CmpORD32U:
3458      case Iop_CmpORD64S:
3459      case Iop_CmpORD64U:
3460         return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3461
3462      case Iop_Add64:
3463         if (mce->bogusLiterals || mce->useLLVMworkarounds)
3464            return expensiveAddSub(mce,True,Ity_I64,
3465                                   vatom1,vatom2, atom1,atom2);
3466         else
3467            goto cheap_AddSub64;
3468      case Iop_Sub64:
3469         if (mce->bogusLiterals)
3470            return expensiveAddSub(mce,False,Ity_I64,
3471                                   vatom1,vatom2, atom1,atom2);
3472         else
3473            goto cheap_AddSub64;
3474
3475      cheap_AddSub64:
3476      case Iop_Mul64:
3477         return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3478
3479      case Iop_Mul16:
3480      case Iop_Add16:
3481      case Iop_Sub16:
3482         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3483
3484      case Iop_Mul8:
3485      case Iop_Sub8:
3486      case Iop_Add8:
3487         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3488
3489      case Iop_CmpEQ64:
3490      case Iop_CmpNE64:
3491         if (mce->bogusLiterals)
3492            goto expensive_cmp64;
3493         else
3494            goto cheap_cmp64;
3495
3496      expensive_cmp64:
3497      case Iop_ExpCmpNE64:
3498         return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3499
3500      cheap_cmp64:
3501      case Iop_CmpLE64S: case Iop_CmpLE64U:
3502      case Iop_CmpLT64U: case Iop_CmpLT64S:
3503         return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3504
3505      case Iop_CmpEQ32:
3506      case Iop_CmpNE32:
3507         if (mce->bogusLiterals)
3508            goto expensive_cmp32;
3509         else
3510            goto cheap_cmp32;
3511
3512      expensive_cmp32:
3513      case Iop_ExpCmpNE32:
3514         return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3515
3516      cheap_cmp32:
3517      case Iop_CmpLE32S: case Iop_CmpLE32U:
3518      case Iop_CmpLT32U: case Iop_CmpLT32S:
3519         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3520
3521      case Iop_CmpEQ16: case Iop_CmpNE16:
3522         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3523
3524      case Iop_ExpCmpNE16:
3525         return expensiveCmpEQorNE(mce,Ity_I16, vatom1,vatom2, atom1,atom2 );
3526
3527      case Iop_CmpEQ8: case Iop_CmpNE8:
3528         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3529
3530      case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3531      case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3532      case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3533      case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3534         /* Just say these all produce a defined result, regardless
3535            of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3536         return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3537
3538      case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3539         return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3540
3541      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3542         return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3543
3544      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3545         return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3546
3547      case Iop_Shl8: case Iop_Shr8: case Iop_Sar8:
3548         return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3549
3550      case Iop_AndV256:
3551         uifu = mkUifUV256; difd = mkDifDV256;
3552         and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
3553      case Iop_AndV128:
3554         uifu = mkUifUV128; difd = mkDifDV128;
3555         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3556      case Iop_And64:
3557         uifu = mkUifU64; difd = mkDifD64;
3558         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3559      case Iop_And32:
3560         uifu = mkUifU32; difd = mkDifD32;
3561         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3562      case Iop_And16:
3563         uifu = mkUifU16; difd = mkDifD16;
3564         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3565      case Iop_And8:
3566         uifu = mkUifU8; difd = mkDifD8;
3567         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3568
3569      case Iop_OrV256:
3570         uifu = mkUifUV256; difd = mkDifDV256;
3571         and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
3572      case Iop_OrV128:
3573         uifu = mkUifUV128; difd = mkDifDV128;
3574         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3575      case Iop_Or64:
3576         uifu = mkUifU64; difd = mkDifD64;
3577         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3578      case Iop_Or32:
3579         uifu = mkUifU32; difd = mkDifD32;
3580         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3581      case Iop_Or16:
3582         uifu = mkUifU16; difd = mkDifD16;
3583         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3584      case Iop_Or8:
3585         uifu = mkUifU8; difd = mkDifD8;
3586         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3587
3588      do_And_Or:
3589         return
3590         assignNew(
3591            'V', mce,
3592            and_or_ty,
3593            difd(mce, uifu(mce, vatom1, vatom2),
3594                      difd(mce, improve(mce, atom1, vatom1),
3595                                improve(mce, atom2, vatom2) ) ) );
3596
3597      case Iop_Xor8:
3598         return mkUifU8(mce, vatom1, vatom2);
3599      case Iop_Xor16:
3600         return mkUifU16(mce, vatom1, vatom2);
3601      case Iop_Xor32:
3602         return mkUifU32(mce, vatom1, vatom2);
3603      case Iop_Xor64:
3604         return mkUifU64(mce, vatom1, vatom2);
3605      case Iop_XorV128:
3606         return mkUifUV128(mce, vatom1, vatom2);
3607      case Iop_XorV256:
3608         return mkUifUV256(mce, vatom1, vatom2);
3609
3610      default:
3611         ppIROp(op);
3612         VG_(tool_panic)("memcheck:expr2vbits_Binop");
3613   }
3614}
3615
3616
3617static
3618IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3619{
3620   IRAtom* vatom = expr2vbits( mce, atom );
3621   tl_assert(isOriginalAtom(mce,atom));
3622   switch (op) {
3623
3624      case Iop_Sqrt64Fx2:
3625         return unary64Fx2(mce, vatom);
3626
3627      case Iop_Sqrt64F0x2:
3628         return unary64F0x2(mce, vatom);
3629
3630      case Iop_Sqrt32Fx8:
3631      case Iop_RSqrt32Fx8:
3632      case Iop_Recip32Fx8:
3633         return unary32Fx8(mce, vatom);
3634
3635      case Iop_Sqrt64Fx4:
3636         return unary64Fx4(mce, vatom);
3637
3638      case Iop_Sqrt32Fx4:
3639      case Iop_RSqrt32Fx4:
3640      case Iop_Recip32Fx4:
3641      case Iop_I32UtoFx4:
3642      case Iop_I32StoFx4:
3643      case Iop_QFtoI32Ux4_RZ:
3644      case Iop_QFtoI32Sx4_RZ:
3645      case Iop_RoundF32x4_RM:
3646      case Iop_RoundF32x4_RP:
3647      case Iop_RoundF32x4_RN:
3648      case Iop_RoundF32x4_RZ:
3649      case Iop_Recip32x4:
3650      case Iop_Abs32Fx4:
3651      case Iop_Neg32Fx4:
3652      case Iop_Rsqrte32Fx4:
3653         return unary32Fx4(mce, vatom);
3654
3655      case Iop_I32UtoFx2:
3656      case Iop_I32StoFx2:
3657      case Iop_Recip32Fx2:
3658      case Iop_Recip32x2:
3659      case Iop_Abs32Fx2:
3660      case Iop_Neg32Fx2:
3661      case Iop_Rsqrte32Fx2:
3662         return unary32Fx2(mce, vatom);
3663
3664      case Iop_Sqrt32F0x4:
3665      case Iop_RSqrt32F0x4:
3666      case Iop_Recip32F0x4:
3667         return unary32F0x4(mce, vatom);
3668
3669      case Iop_32UtoV128:
3670      case Iop_64UtoV128:
3671      case Iop_Dup8x16:
3672      case Iop_Dup16x8:
3673      case Iop_Dup32x4:
3674      case Iop_Reverse16_8x16:
3675      case Iop_Reverse32_8x16:
3676      case Iop_Reverse32_16x8:
3677      case Iop_Reverse64_8x16:
3678      case Iop_Reverse64_16x8:
3679      case Iop_Reverse64_32x4:
3680      case Iop_V256toV128_1: case Iop_V256toV128_0:
3681         return assignNew('V', mce, Ity_V128, unop(op, vatom));
3682
3683      case Iop_F128HItoF64:  /* F128 -> high half of F128 */
3684      case Iop_D128HItoD64:  /* D128 -> high half of D128 */
3685         return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3686      case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
3687      case Iop_D128LOtoD64:  /* D128 -> low  half of D128 */
3688         return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3689
3690      case Iop_NegF128:
3691      case Iop_AbsF128:
3692         return mkPCastTo(mce, Ity_I128, vatom);
3693
3694      case Iop_I32StoF128: /* signed I32 -> F128 */
3695      case Iop_I64StoF128: /* signed I64 -> F128 */
3696      case Iop_I32UtoF128: /* unsigned I32 -> F128 */
3697      case Iop_I64UtoF128: /* unsigned I64 -> F128 */
3698      case Iop_F32toF128:  /* F32 -> F128 */
3699      case Iop_F64toF128:  /* F64 -> F128 */
3700      case Iop_I64StoD128: /* signed I64 -> D128 */
3701         return mkPCastTo(mce, Ity_I128, vatom);
3702
3703      case Iop_F32toF64:
3704      case Iop_I32StoF64:
3705      case Iop_I32UtoF64:
3706      case Iop_NegF64:
3707      case Iop_AbsF64:
3708      case Iop_Est5FRSqrt:
3709      case Iop_RoundF64toF64_NEAREST:
3710      case Iop_RoundF64toF64_NegINF:
3711      case Iop_RoundF64toF64_PosINF:
3712      case Iop_RoundF64toF64_ZERO:
3713      case Iop_Clz64:
3714      case Iop_D32toD64:
3715      case Iop_ExtractExpD64:    /* D64  -> I64 */
3716      case Iop_ExtractExpD128:   /* D128 -> I64 */
3717      case Iop_DPBtoBCD:
3718      case Iop_BCDtoDPB:
3719         return mkPCastTo(mce, Ity_I64, vatom);
3720
3721      case Iop_D64toD128:
3722         return mkPCastTo(mce, Ity_I128, vatom);
3723
3724      case Iop_Clz32:
3725      case Iop_TruncF64asF32:
3726      case Iop_NegF32:
3727      case Iop_AbsF32:
3728         return mkPCastTo(mce, Ity_I32, vatom);
3729
3730      case Iop_Ctz32:
3731      case Iop_Ctz64:
3732         return expensiveCountTrailingZeroes(mce, op, atom, vatom);
3733
3734      case Iop_1Uto64:
3735      case Iop_1Sto64:
3736      case Iop_8Uto64:
3737      case Iop_8Sto64:
3738      case Iop_16Uto64:
3739      case Iop_16Sto64:
3740      case Iop_32Sto64:
3741      case Iop_32Uto64:
3742      case Iop_V128to64:
3743      case Iop_V128HIto64:
3744      case Iop_128HIto64:
3745      case Iop_128to64:
3746      case Iop_Dup8x8:
3747      case Iop_Dup16x4:
3748      case Iop_Dup32x2:
3749      case Iop_Reverse16_8x8:
3750      case Iop_Reverse32_8x8:
3751      case Iop_Reverse32_16x4:
3752      case Iop_Reverse64_8x8:
3753      case Iop_Reverse64_16x4:
3754      case Iop_Reverse64_32x2:
3755      case Iop_V256to64_0: case Iop_V256to64_1:
3756      case Iop_V256to64_2: case Iop_V256to64_3:
3757         return assignNew('V', mce, Ity_I64, unop(op, vatom));
3758
3759      case Iop_64to32:
3760      case Iop_64HIto32:
3761      case Iop_1Uto32:
3762      case Iop_1Sto32:
3763      case Iop_8Uto32:
3764      case Iop_16Uto32:
3765      case Iop_16Sto32:
3766      case Iop_8Sto32:
3767      case Iop_V128to32:
3768         return assignNew('V', mce, Ity_I32, unop(op, vatom));
3769
3770      case Iop_8Sto16:
3771      case Iop_8Uto16:
3772      case Iop_32to16:
3773      case Iop_32HIto16:
3774      case Iop_64to16:
3775      case Iop_GetMSBs8x16:
3776         return assignNew('V', mce, Ity_I16, unop(op, vatom));
3777
3778      case Iop_1Uto8:
3779      case Iop_1Sto8:
3780      case Iop_16to8:
3781      case Iop_16HIto8:
3782      case Iop_32to8:
3783      case Iop_64to8:
3784      case Iop_GetMSBs8x8:
3785         return assignNew('V', mce, Ity_I8, unop(op, vatom));
3786
3787      case Iop_32to1:
3788         return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3789
3790      case Iop_64to1:
3791         return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3792
3793      case Iop_ReinterpF64asI64:
3794      case Iop_ReinterpI64asF64:
3795      case Iop_ReinterpI32asF32:
3796      case Iop_ReinterpF32asI32:
3797      case Iop_ReinterpI64asD64:
3798      case Iop_ReinterpD64asI64:
3799      case Iop_NotV256:
3800      case Iop_NotV128:
3801      case Iop_Not64:
3802      case Iop_Not32:
3803      case Iop_Not16:
3804      case Iop_Not8:
3805      case Iop_Not1:
3806         return vatom;
3807
3808      case Iop_CmpNEZ8x8:
3809      case Iop_Cnt8x8:
3810      case Iop_Clz8Sx8:
3811      case Iop_Cls8Sx8:
3812      case Iop_Abs8x8:
3813         return mkPCast8x8(mce, vatom);
3814
3815      case Iop_CmpNEZ8x16:
3816      case Iop_Cnt8x16:
3817      case Iop_Clz8Sx16:
3818      case Iop_Cls8Sx16:
3819      case Iop_Abs8x16:
3820         return mkPCast8x16(mce, vatom);
3821
3822      case Iop_CmpNEZ16x4:
3823      case Iop_Clz16Sx4:
3824      case Iop_Cls16Sx4:
3825      case Iop_Abs16x4:
3826         return mkPCast16x4(mce, vatom);
3827
3828      case Iop_CmpNEZ16x8:
3829      case Iop_Clz16Sx8:
3830      case Iop_Cls16Sx8:
3831      case Iop_Abs16x8:
3832         return mkPCast16x8(mce, vatom);
3833
3834      case Iop_CmpNEZ32x2:
3835      case Iop_Clz32Sx2:
3836      case Iop_Cls32Sx2:
3837      case Iop_FtoI32Ux2_RZ:
3838      case Iop_FtoI32Sx2_RZ:
3839      case Iop_Abs32x2:
3840         return mkPCast32x2(mce, vatom);
3841
3842      case Iop_CmpNEZ32x4:
3843      case Iop_Clz32Sx4:
3844      case Iop_Cls32Sx4:
3845      case Iop_FtoI32Ux4_RZ:
3846      case Iop_FtoI32Sx4_RZ:
3847      case Iop_Abs32x4:
3848         return mkPCast32x4(mce, vatom);
3849
3850      case Iop_CmpwNEZ32:
3851         return mkPCastTo(mce, Ity_I32, vatom);
3852
3853      case Iop_CmpwNEZ64:
3854         return mkPCastTo(mce, Ity_I64, vatom);
3855
3856      case Iop_CmpNEZ64x2:
3857         return mkPCast64x2(mce, vatom);
3858
3859      case Iop_NarrowUn16to8x8:
3860      case Iop_NarrowUn32to16x4:
3861      case Iop_NarrowUn64to32x2:
3862      case Iop_QNarrowUn16Sto8Sx8:
3863      case Iop_QNarrowUn16Sto8Ux8:
3864      case Iop_QNarrowUn16Uto8Ux8:
3865      case Iop_QNarrowUn32Sto16Sx4:
3866      case Iop_QNarrowUn32Sto16Ux4:
3867      case Iop_QNarrowUn32Uto16Ux4:
3868      case Iop_QNarrowUn64Sto32Sx2:
3869      case Iop_QNarrowUn64Sto32Ux2:
3870      case Iop_QNarrowUn64Uto32Ux2:
3871         return vectorNarrowUnV128(mce, op, vatom);
3872
3873      case Iop_Widen8Sto16x8:
3874      case Iop_Widen8Uto16x8:
3875      case Iop_Widen16Sto32x4:
3876      case Iop_Widen16Uto32x4:
3877      case Iop_Widen32Sto64x2:
3878      case Iop_Widen32Uto64x2:
3879         return vectorWidenI64(mce, op, vatom);
3880
3881      case Iop_PwAddL32Ux2:
3882      case Iop_PwAddL32Sx2:
3883         return mkPCastTo(mce, Ity_I64,
3884               assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3885
3886      case Iop_PwAddL16Ux4:
3887      case Iop_PwAddL16Sx4:
3888         return mkPCast32x2(mce,
3889               assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3890
3891      case Iop_PwAddL8Ux8:
3892      case Iop_PwAddL8Sx8:
3893         return mkPCast16x4(mce,
3894               assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3895
3896      case Iop_PwAddL32Ux4:
3897      case Iop_PwAddL32Sx4:
3898         return mkPCast64x2(mce,
3899               assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3900
3901      case Iop_PwAddL16Ux8:
3902      case Iop_PwAddL16Sx8:
3903         return mkPCast32x4(mce,
3904               assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3905
3906      case Iop_PwAddL8Ux16:
3907      case Iop_PwAddL8Sx16:
3908         return mkPCast16x8(mce,
3909               assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3910
3911      case Iop_I64UtoF32:
3912      default:
3913         ppIROp(op);
3914         VG_(tool_panic)("memcheck:expr2vbits_Unop");
3915   }
3916}
3917
3918
3919/* Worker function; do not call directly. */
3920static
3921IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3922                              IREndness end, IRType ty,
3923                              IRAtom* addr, UInt bias )
3924{
3925   void*    helper;
3926   const HChar* hname;
3927   IRDirty* di;
3928   IRTemp   datavbits;
3929   IRAtom*  addrAct;
3930
3931   tl_assert(isOriginalAtom(mce,addr));
3932   tl_assert(end == Iend_LE || end == Iend_BE);
3933
3934   /* First, emit a definedness test for the address.  This also sets
3935      the address (shadow) to 'defined' following the test. */
3936   complainIfUndefined( mce, addr, NULL );
3937
3938   /* Now cook up a call to the relevant helper function, to read the
3939      data V bits from shadow memory. */
3940   ty = shadowTypeV(ty);
3941
3942   if (end == Iend_LE) {
3943      switch (ty) {
3944         case Ity_I64: helper = &MC_(helperc_LOADV64le);
3945                       hname = "MC_(helperc_LOADV64le)";
3946                       break;
3947         case Ity_I32: helper = &MC_(helperc_LOADV32le);
3948                       hname = "MC_(helperc_LOADV32le)";
3949                       break;
3950         case Ity_I16: helper = &MC_(helperc_LOADV16le);
3951                       hname = "MC_(helperc_LOADV16le)";
3952                       break;
3953         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3954                       hname = "MC_(helperc_LOADV8)";
3955                       break;
3956         default:      ppIRType(ty);
3957                       VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3958      }
3959   } else {
3960      switch (ty) {
3961         case Ity_I64: helper = &MC_(helperc_LOADV64be);
3962                       hname = "MC_(helperc_LOADV64be)";
3963                       break;
3964         case Ity_I32: helper = &MC_(helperc_LOADV32be);
3965                       hname = "MC_(helperc_LOADV32be)";
3966                       break;
3967         case Ity_I16: helper = &MC_(helperc_LOADV16be);
3968                       hname = "MC_(helperc_LOADV16be)";
3969                       break;
3970         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3971                       hname = "MC_(helperc_LOADV8)";
3972                       break;
3973         default:      ppIRType(ty);
3974                       VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3975      }
3976   }
3977
3978   /* Generate the actual address into addrAct. */
3979   if (bias == 0) {
3980      addrAct = addr;
3981   } else {
3982      IROp    mkAdd;
3983      IRAtom* eBias;
3984      IRType  tyAddr  = mce->hWordTy;
3985      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3986      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3987      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3988      addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3989   }
3990
3991   /* We need to have a place to park the V bits we're just about to
3992      read. */
3993   datavbits = newTemp(mce, ty, VSh);
3994   di = unsafeIRDirty_1_N( datavbits,
3995                           1/*regparms*/,
3996                           hname, VG_(fnptr_to_fnentry)( helper ),
3997                           mkIRExprVec_1( addrAct ));
3998   setHelperAnns( mce, di );
3999   stmt( 'V', mce, IRStmt_Dirty(di) );
4000
4001   return mkexpr(datavbits);
4002}
4003
4004
4005static
4006IRAtom* expr2vbits_Load ( MCEnv* mce,
4007                          IREndness end, IRType ty,
4008                          IRAtom* addr, UInt bias )
4009{
4010   tl_assert(end == Iend_LE || end == Iend_BE);
4011   switch (shadowTypeV(ty)) {
4012      case Ity_I8:
4013      case Ity_I16:
4014      case Ity_I32:
4015      case Ity_I64:
4016         return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
4017      case Ity_V128: {
4018         IRAtom *v64hi, *v64lo;
4019         if (end == Iend_LE) {
4020            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
4021            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4022         } else {
4023            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
4024            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4025         }
4026         return assignNew( 'V', mce,
4027                           Ity_V128,
4028                           binop(Iop_64HLtoV128, v64hi, v64lo));
4029      }
4030      case Ity_V256: {
4031         /* V256-bit case -- phrased in terms of 64 bit units (Qs),
4032            with Q3 being the most significant lane. */
4033         if (end == Iend_BE) goto unhandled;
4034         IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
4035         IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
4036         IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
4037         IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
4038         return assignNew( 'V', mce,
4039                           Ity_V256,
4040                           IRExpr_Qop(Iop_64x4toV256,
4041                                      v64Q3, v64Q2, v64Q1, v64Q0));
4042      }
4043      unhandled:
4044      default:
4045         VG_(tool_panic)("expr2vbits_Load");
4046   }
4047}
4048
4049
4050/* If there is no guard expression or the guard is always TRUE this function
4051   behaves like expr2vbits_Load. If the guard is not true at runtime, an
4052   all-bits-defined bit pattern will be returned.
4053   It is assumed that definedness of GUARD has already been checked at the call
4054   site. */
4055static
4056IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
4057                                  IREndness end, IRType ty,
4058                                  IRAtom* addr, UInt bias, IRAtom *guard )
4059{
4060   if (guard) {
4061      IRAtom *cond, *iffalse, *iftrue;
4062
4063      cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
4064      iftrue  = assignNew('V', mce, ty,
4065                          expr2vbits_Load(mce, end, ty, addr, bias));
4066      iffalse = assignNew('V', mce, ty, definedOfType(ty));
4067
4068      return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
4069   }
4070
4071   /* No guard expression or unconditional load */
4072   return expr2vbits_Load(mce, end, ty, addr, bias);
4073}
4074
4075
4076static
4077IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
4078                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
4079{
4080   IRAtom *vbitsC, *vbits0, *vbitsX;
4081   IRType ty;
4082   /* Given Mux0X(cond,expr0,exprX), generate
4083         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
4084      That is, steer the V bits like the originals, but trash the
4085      result if the steering value is undefined.  This gives
4086      lazy propagation. */
4087   tl_assert(isOriginalAtom(mce, cond));
4088   tl_assert(isOriginalAtom(mce, expr0));
4089   tl_assert(isOriginalAtom(mce, exprX));
4090
4091   vbitsC = expr2vbits(mce, cond);
4092   vbits0 = expr2vbits(mce, expr0);
4093   vbitsX = expr2vbits(mce, exprX);
4094   ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4095
4096   return
4097      mkUifU(mce, ty, assignNew('V', mce, ty,
4098                                     IRExpr_Mux0X(cond, vbits0, vbitsX)),
4099                      mkPCastTo(mce, ty, vbitsC) );
4100}
4101
4102/* --------- This is the main expression-handling function. --------- */
4103
4104static
4105IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4106{
4107   switch (e->tag) {
4108
4109      case Iex_Get:
4110         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4111
4112      case Iex_GetI:
4113         return shadow_GETI( mce, e->Iex.GetI.descr,
4114                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
4115
4116      case Iex_RdTmp:
4117         return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4118
4119      case Iex_Const:
4120         return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4121
4122      case Iex_Qop:
4123         return expr2vbits_Qop(
4124                   mce,
4125                   e->Iex.Qop.details->op,
4126                   e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4127                   e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4128                );
4129
4130      case Iex_Triop:
4131         return expr2vbits_Triop(
4132                   mce,
4133                   e->Iex.Triop.details->op,
4134                   e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4135                   e->Iex.Triop.details->arg3
4136                );
4137
4138      case Iex_Binop:
4139         return expr2vbits_Binop(
4140                   mce,
4141                   e->Iex.Binop.op,
4142                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
4143                );
4144
4145      case Iex_Unop:
4146         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4147
4148      case Iex_Load:
4149         return expr2vbits_Load( mce, e->Iex.Load.end,
4150                                      e->Iex.Load.ty,
4151                                      e->Iex.Load.addr, 0/*addr bias*/ );
4152
4153      case Iex_CCall:
4154         return mkLazyN( mce, e->Iex.CCall.args,
4155                              e->Iex.CCall.retty,
4156                              e->Iex.CCall.cee );
4157
4158      case Iex_Mux0X:
4159         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4160                                       e->Iex.Mux0X.exprX);
4161
4162      default:
4163         VG_(printf)("\n");
4164         ppIRExpr(e);
4165         VG_(printf)("\n");
4166         VG_(tool_panic)("memcheck: expr2vbits");
4167   }
4168}
4169
4170/*------------------------------------------------------------*/
4171/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
4172/*------------------------------------------------------------*/
4173
4174/* Widen a value to the host word size. */
4175
4176static
4177IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4178{
4179   IRType ty, tyH;
4180
4181   /* vatom is vbits-value and as such can only have a shadow type. */
4182   tl_assert(isShadowAtom(mce,vatom));
4183
4184   ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
4185   tyH = mce->hWordTy;
4186
4187   if (tyH == Ity_I32) {
4188      switch (ty) {
4189         case Ity_I32:
4190            return vatom;
4191         case Ity_I16:
4192            return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4193         case Ity_I8:
4194            return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4195         default:
4196            goto unhandled;
4197      }
4198   } else
4199   if (tyH == Ity_I64) {
4200      switch (ty) {
4201         case Ity_I32:
4202            return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4203         case Ity_I16:
4204            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4205                   assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4206         case Ity_I8:
4207            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4208                   assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4209         default:
4210            goto unhandled;
4211      }
4212   } else {
4213      goto unhandled;
4214   }
4215  unhandled:
4216   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4217   VG_(tool_panic)("zwidenToHostWord");
4218}
4219
4220
4221/* Generate a shadow store.  addr is always the original address atom.
4222   You can pass in either originals or V-bits for the data atom, but
4223   obviously not both.  guard :: Ity_I1 controls whether the store
4224   really happens; NULL means it unconditionally does.  Note that
4225   guard itself is not checked for definedness; the caller of this
4226   function must do that if necessary. */
4227
4228static
4229void do_shadow_Store ( MCEnv* mce,
4230                       IREndness end,
4231                       IRAtom* addr, UInt bias,
4232                       IRAtom* data, IRAtom* vdata,
4233                       IRAtom* guard )
4234{
4235   IROp     mkAdd;
4236   IRType   ty, tyAddr;
4237   void*    helper = NULL;
4238   const HChar* hname = NULL;
4239   IRConst* c;
4240
4241   tyAddr = mce->hWordTy;
4242   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4243   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4244   tl_assert( end == Iend_LE || end == Iend_BE );
4245
4246   if (data) {
4247      tl_assert(!vdata);
4248      tl_assert(isOriginalAtom(mce, data));
4249      tl_assert(bias == 0);
4250      vdata = expr2vbits( mce, data );
4251   } else {
4252      tl_assert(vdata);
4253   }
4254
4255   tl_assert(isOriginalAtom(mce,addr));
4256   tl_assert(isShadowAtom(mce,vdata));
4257
4258   if (guard) {
4259      tl_assert(isOriginalAtom(mce, guard));
4260      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4261   }
4262
4263   ty = typeOfIRExpr(mce->sb->tyenv, vdata);
4264
4265   // If we're not doing undefined value checking, pretend that this value
4266   // is "all valid".  That lets Vex's optimiser remove some of the V bit
4267   // shadow computation ops that precede it.
4268   if (MC_(clo_mc_level) == 1) {
4269      switch (ty) {
4270         case Ity_V256: // V256 weirdness -- used four times
4271                        c = IRConst_V256(V_BITS32_DEFINED); break;
4272         case Ity_V128: // V128 weirdness -- used twice
4273                        c = IRConst_V128(V_BITS16_DEFINED); break;
4274         case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
4275         case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
4276         case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
4277         case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
4278         default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4279      }
4280      vdata = IRExpr_Const( c );
4281   }
4282
4283   /* First, emit a definedness test for the address.  This also sets
4284      the address (shadow) to 'defined' following the test. */
4285   complainIfUndefined( mce, addr, guard );
4286
4287   /* Now decide which helper function to call to write the data V
4288      bits into shadow memory. */
4289   if (end == Iend_LE) {
4290      switch (ty) {
4291         case Ity_V256: /* we'll use the helper four times */
4292         case Ity_V128: /* we'll use the helper twice */
4293         case Ity_I64: helper = &MC_(helperc_STOREV64le);
4294                       hname = "MC_(helperc_STOREV64le)";
4295                       break;
4296         case Ity_I32: helper = &MC_(helperc_STOREV32le);
4297                       hname = "MC_(helperc_STOREV32le)";
4298                       break;
4299         case Ity_I16: helper = &MC_(helperc_STOREV16le);
4300                       hname = "MC_(helperc_STOREV16le)";
4301                       break;
4302         case Ity_I8:  helper = &MC_(helperc_STOREV8);
4303                       hname = "MC_(helperc_STOREV8)";
4304                       break;
4305         default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4306      }
4307   } else {
4308      switch (ty) {
4309         case Ity_V128: /* we'll use the helper twice */
4310         case Ity_I64: helper = &MC_(helperc_STOREV64be);
4311                       hname = "MC_(helperc_STOREV64be)";
4312                       break;
4313         case Ity_I32: helper = &MC_(helperc_STOREV32be);
4314                       hname = "MC_(helperc_STOREV32be)";
4315                       break;
4316         case Ity_I16: helper = &MC_(helperc_STOREV16be);
4317                       hname = "MC_(helperc_STOREV16be)";
4318                       break;
4319         case Ity_I8:  helper = &MC_(helperc_STOREV8);
4320                       hname = "MC_(helperc_STOREV8)";
4321                       break;
4322         /* Note, no V256 case here, because no big-endian target that
4323            we support, has 256 vectors. */
4324         default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4325      }
4326   }
4327
4328   if (UNLIKELY(ty == Ity_V256)) {
4329
4330      /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4331         Q3 being the most significant lane. */
4332      /* These are the offsets of the Qs in memory. */
4333      Int     offQ0, offQ1, offQ2, offQ3;
4334
4335      /* Various bits for constructing the 4 lane helper calls */
4336      IRDirty *diQ0,    *diQ1,    *diQ2,    *diQ3;
4337      IRAtom  *addrQ0,  *addrQ1,  *addrQ2,  *addrQ3;
4338      IRAtom  *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4339      IRAtom  *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4340
4341      if (end == Iend_LE) {
4342         offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4343      } else {
4344         offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4345      }
4346
4347      eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4348      addrQ0  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4349      vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4350      diQ0    = unsafeIRDirty_0_N(
4351                   1/*regparms*/,
4352                   hname, VG_(fnptr_to_fnentry)( helper ),
4353                   mkIRExprVec_2( addrQ0, vdataQ0 )
4354                );
4355
4356      eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4357      addrQ1  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4358      vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4359      diQ1    = unsafeIRDirty_0_N(
4360                   1/*regparms*/,
4361                   hname, VG_(fnptr_to_fnentry)( helper ),
4362                   mkIRExprVec_2( addrQ1, vdataQ1 )
4363                );
4364
4365      eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4366      addrQ2  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4367      vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4368      diQ2    = unsafeIRDirty_0_N(
4369                   1/*regparms*/,
4370                   hname, VG_(fnptr_to_fnentry)( helper ),
4371                   mkIRExprVec_2( addrQ2, vdataQ2 )
4372                );
4373
4374      eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4375      addrQ3  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4376      vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4377      diQ3    = unsafeIRDirty_0_N(
4378                   1/*regparms*/,
4379                   hname, VG_(fnptr_to_fnentry)( helper ),
4380                   mkIRExprVec_2( addrQ3, vdataQ3 )
4381                );
4382
4383      if (guard)
4384         diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4385
4386      setHelperAnns( mce, diQ0 );
4387      setHelperAnns( mce, diQ1 );
4388      setHelperAnns( mce, diQ2 );
4389      setHelperAnns( mce, diQ3 );
4390      stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4391      stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4392      stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4393      stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4394
4395   }
4396   else if (UNLIKELY(ty == Ity_V128)) {
4397
4398      /* V128-bit case */
4399      /* See comment in next clause re 64-bit regparms */
4400      /* also, need to be careful about endianness */
4401
4402      Int     offLo64, offHi64;
4403      IRDirty *diLo64, *diHi64;
4404      IRAtom  *addrLo64, *addrHi64;
4405      IRAtom  *vdataLo64, *vdataHi64;
4406      IRAtom  *eBiasLo64, *eBiasHi64;
4407
4408      if (end == Iend_LE) {
4409         offLo64 = 0;
4410         offHi64 = 8;
4411      } else {
4412         offLo64 = 8;
4413         offHi64 = 0;
4414      }
4415
4416      eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
4417      addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4418      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
4419      diLo64    = unsafeIRDirty_0_N(
4420                     1/*regparms*/,
4421                     hname, VG_(fnptr_to_fnentry)( helper ),
4422                     mkIRExprVec_2( addrLo64, vdataLo64 )
4423                  );
4424      eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
4425      addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4426      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
4427      diHi64    = unsafeIRDirty_0_N(
4428                     1/*regparms*/,
4429                     hname, VG_(fnptr_to_fnentry)( helper ),
4430                     mkIRExprVec_2( addrHi64, vdataHi64 )
4431                  );
4432      if (guard) diLo64->guard = guard;
4433      if (guard) diHi64->guard = guard;
4434      setHelperAnns( mce, diLo64 );
4435      setHelperAnns( mce, diHi64 );
4436      stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4437      stmt( 'V', mce, IRStmt_Dirty(diHi64) );
4438
4439   } else {
4440
4441      IRDirty *di;
4442      IRAtom  *addrAct;
4443
4444      /* 8/16/32/64-bit cases */
4445      /* Generate the actual address into addrAct. */
4446      if (bias == 0) {
4447         addrAct = addr;
4448      } else {
4449         IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4450         addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
4451      }
4452
4453      if (ty == Ity_I64) {
4454         /* We can't do this with regparm 2 on 32-bit platforms, since
4455            the back ends aren't clever enough to handle 64-bit
4456            regparm args.  Therefore be different. */
4457         di = unsafeIRDirty_0_N(
4458                 1/*regparms*/,
4459                 hname, VG_(fnptr_to_fnentry)( helper ),
4460                 mkIRExprVec_2( addrAct, vdata )
4461              );
4462      } else {
4463         di = unsafeIRDirty_0_N(
4464                 2/*regparms*/,
4465                 hname, VG_(fnptr_to_fnentry)( helper ),
4466                 mkIRExprVec_2( addrAct,
4467                                zwidenToHostWord( mce, vdata ))
4468              );
4469      }
4470      if (guard) di->guard = guard;
4471      setHelperAnns( mce, di );
4472      stmt( 'V', mce, IRStmt_Dirty(di) );
4473   }
4474
4475}
4476
4477
4478/* Do lazy pessimistic propagation through a dirty helper call, by
4479   looking at the annotations on it.  This is the most complex part of
4480   Memcheck. */
4481
4482static IRType szToITy ( Int n )
4483{
4484   switch (n) {
4485      case 1: return Ity_I8;
4486      case 2: return Ity_I16;
4487      case 4: return Ity_I32;
4488      case 8: return Ity_I64;
4489      default: VG_(tool_panic)("szToITy(memcheck)");
4490   }
4491}
4492
4493static
4494void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4495{
4496   Int       i, k, n, toDo, gSz, gOff;
4497   IRAtom    *src, *here, *curr;
4498   IRType    tySrc, tyDst;
4499   IRTemp    dst;
4500   IREndness end;
4501
4502   /* What's the native endianness?  We need to know this. */
4503#  if defined(VG_BIGENDIAN)
4504   end = Iend_BE;
4505#  elif defined(VG_LITTLEENDIAN)
4506   end = Iend_LE;
4507#  else
4508#    error "Unknown endianness"
4509#  endif
4510
4511   /* First check the guard. */
4512   complainIfUndefined(mce, d->guard, NULL);
4513
4514   /* Now round up all inputs and PCast over them. */
4515   curr = definedOfType(Ity_I32);
4516
4517   /* Inputs: unmasked args
4518      Note: arguments are evaluated REGARDLESS of the guard expression */
4519   for (i = 0; d->args[i]; i++) {
4520      if (d->cee->mcx_mask & (1<<i)) {
4521         /* ignore this arg */
4522      } else {
4523         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4524         curr = mkUifU32(mce, here, curr);
4525      }
4526   }
4527
4528   /* Inputs: guest state that we read. */
4529   for (i = 0; i < d->nFxState; i++) {
4530      tl_assert(d->fxState[i].fx != Ifx_None);
4531      if (d->fxState[i].fx == Ifx_Write)
4532         continue;
4533
4534      /* Enumerate the described state segments */
4535      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4536         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4537         gSz  = d->fxState[i].size;
4538
4539         /* Ignore any sections marked as 'always defined'. */
4540         if (isAlwaysDefd(mce, gOff, gSz)) {
4541            if (0)
4542            VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4543                        gOff, gSz);
4544            continue;
4545         }
4546
4547         /* This state element is read or modified.  So we need to
4548            consider it.  If larger than 8 bytes, deal with it in
4549            8-byte chunks. */
4550         while (True) {
4551            tl_assert(gSz >= 0);
4552            if (gSz == 0) break;
4553            n = gSz <= 8 ? gSz : 8;
4554            /* update 'curr' with UifU of the state slice
4555               gOff .. gOff+n-1 */
4556            tySrc = szToITy( n );
4557
4558            /* Observe the guard expression. If it is false use an
4559               all-bits-defined bit pattern */
4560            IRAtom *cond, *iffalse, *iftrue;
4561
4562            cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4563            iftrue  = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4564            iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4565            src     = assignNew('V', mce, tySrc,
4566                                IRExpr_Mux0X(cond, iffalse, iftrue));
4567
4568            here = mkPCastTo( mce, Ity_I32, src );
4569            curr = mkUifU32(mce, here, curr);
4570            gSz -= n;
4571            gOff += n;
4572         }
4573      }
4574   }
4575
4576   /* Inputs: memory.  First set up some info needed regardless of
4577      whether we're doing reads or writes. */
4578
4579   if (d->mFx != Ifx_None) {
4580      /* Because we may do multiple shadow loads/stores from the same
4581         base address, it's best to do a single test of its
4582         definedness right now.  Post-instrumentation optimisation
4583         should remove all but this test. */
4584      IRType tyAddr;
4585      tl_assert(d->mAddr);
4586      complainIfUndefined(mce, d->mAddr, d->guard);
4587
4588      tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4589      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4590      tl_assert(tyAddr == mce->hWordTy); /* not really right */
4591   }
4592
4593   /* Deal with memory inputs (reads or modifies) */
4594   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4595      toDo   = d->mSize;
4596      /* chew off 32-bit chunks.  We don't care about the endianness
4597         since it's all going to be condensed down to a single bit,
4598         but nevertheless choose an endianness which is hopefully
4599         native to the platform. */
4600      while (toDo >= 4) {
4601         here = mkPCastTo(
4602                   mce, Ity_I32,
4603                   expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4604                                             d->mSize - toDo, d->guard )
4605                );
4606         curr = mkUifU32(mce, here, curr);
4607         toDo -= 4;
4608      }
4609      /* chew off 16-bit chunks */
4610      while (toDo >= 2) {
4611         here = mkPCastTo(
4612                   mce, Ity_I32,
4613                   expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4614                                             d->mSize - toDo, d->guard )
4615                );
4616         curr = mkUifU32(mce, here, curr);
4617         toDo -= 2;
4618      }
4619      /* chew off the remaining 8-bit chunk, if any */
4620      if (toDo == 1) {
4621         here = mkPCastTo(
4622                   mce, Ity_I32,
4623                   expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4624                                             d->mSize - toDo, d->guard )
4625                );
4626         curr = mkUifU32(mce, here, curr);
4627         toDo -= 1;
4628      }
4629      tl_assert(toDo == 0);
4630   }
4631
4632   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
4633      all the inputs to the helper.  Now we need to re-distribute the
4634      results to all destinations. */
4635
4636   /* Outputs: the destination temporary, if there is one. */
4637   if (d->tmp != IRTemp_INVALID) {
4638      dst   = findShadowTmpV(mce, d->tmp);
4639      tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4640      assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4641   }
4642
4643   /* Outputs: guest state that we write or modify. */
4644   for (i = 0; i < d->nFxState; i++) {
4645      tl_assert(d->fxState[i].fx != Ifx_None);
4646      if (d->fxState[i].fx == Ifx_Read)
4647         continue;
4648
4649      /* Enumerate the described state segments */
4650      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4651         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4652         gSz  = d->fxState[i].size;
4653
4654         /* Ignore any sections marked as 'always defined'. */
4655         if (isAlwaysDefd(mce, gOff, gSz))
4656            continue;
4657
4658         /* This state element is written or modified.  So we need to
4659            consider it.  If larger than 8 bytes, deal with it in
4660            8-byte chunks. */
4661         while (True) {
4662            tl_assert(gSz >= 0);
4663            if (gSz == 0) break;
4664            n = gSz <= 8 ? gSz : 8;
4665            /* Write suitably-casted 'curr' to the state slice
4666               gOff .. gOff+n-1 */
4667            tyDst = szToITy( n );
4668            do_shadow_PUT( mce, gOff,
4669                                NULL, /* original atom */
4670                                mkPCastTo( mce, tyDst, curr ), d->guard );
4671            gSz -= n;
4672            gOff += n;
4673         }
4674      }
4675   }
4676
4677   /* Outputs: memory that we write or modify.  Same comments about
4678      endianness as above apply. */
4679   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4680      toDo   = d->mSize;
4681      /* chew off 32-bit chunks */
4682      while (toDo >= 4) {
4683         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4684                          NULL, /* original data */
4685                          mkPCastTo( mce, Ity_I32, curr ),
4686                          d->guard );
4687         toDo -= 4;
4688      }
4689      /* chew off 16-bit chunks */
4690      while (toDo >= 2) {
4691         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4692                          NULL, /* original data */
4693                          mkPCastTo( mce, Ity_I16, curr ),
4694                          d->guard );
4695         toDo -= 2;
4696      }
4697      /* chew off the remaining 8-bit chunk, if any */
4698      if (toDo == 1) {
4699         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4700                          NULL, /* original data */
4701                          mkPCastTo( mce, Ity_I8, curr ),
4702                          d->guard );
4703         toDo -= 1;
4704      }
4705      tl_assert(toDo == 0);
4706   }
4707
4708}
4709
4710
4711/* We have an ABI hint telling us that [base .. base+len-1] is to
4712   become undefined ("writable").  Generate code to call a helper to
4713   notify the A/V bit machinery of this fact.
4714
4715   We call
4716   void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4717                                                    Addr nia );
4718*/
4719static
4720void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4721{
4722   IRDirty* di;
4723   /* Minor optimisation: if not doing origin tracking, ignore the
4724      supplied nia and pass zero instead.  This is on the basis that
4725      MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4726      almost always generate a shorter instruction to put zero into a
4727      register than any other value. */
4728   if (MC_(clo_mc_level) < 3)
4729      nia = mkIRExpr_HWord(0);
4730
4731   di = unsafeIRDirty_0_N(
4732           0/*regparms*/,
4733           "MC_(helperc_MAKE_STACK_UNINIT)",
4734           VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4735           mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4736        );
4737   stmt( 'V', mce, IRStmt_Dirty(di) );
4738}
4739
4740
4741/* ------ Dealing with IRCAS (big and complex) ------ */
4742
4743/* FWDS */
4744static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4745                             IRAtom* baseaddr, Int offset );
4746static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4747static void    gen_store_b ( MCEnv* mce, Int szB,
4748                             IRAtom* baseaddr, Int offset, IRAtom* dataB,
4749                             IRAtom* guard );
4750
4751static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4752static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4753
4754
4755/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4756   IRExpr.Consts, else this asserts.  If they are both Consts, it
4757   doesn't do anything.  So that just leaves the RdTmp case.
4758
4759   In which case: this assigns the shadow value SHADOW to the IR
4760   shadow temporary associated with ORIG.  That is, ORIG, being an
4761   original temporary, will have a shadow temporary associated with
4762   it.  However, in the case envisaged here, there will so far have
4763   been no IR emitted to actually write a shadow value into that
4764   temporary.  What this routine does is to (emit IR to) copy the
4765   value in SHADOW into said temporary, so that after this call,
4766   IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4767   value in SHADOW.
4768
4769   Point is to allow callers to compute "by hand" a shadow value for
4770   ORIG, and force it to be associated with ORIG.
4771
4772   How do we know that that shadow associated with ORIG has not so far
4773   been assigned to?  Well, we don't per se know that, but supposing
4774   it had.  Then this routine would create a second assignment to it,
4775   and later the IR sanity checker would barf.  But that never
4776   happens.  QED.
4777*/
4778static void bind_shadow_tmp_to_orig ( UChar how,
4779                                      MCEnv* mce,
4780                                      IRAtom* orig, IRAtom* shadow )
4781{
4782   tl_assert(isOriginalAtom(mce, orig));
4783   tl_assert(isShadowAtom(mce, shadow));
4784   switch (orig->tag) {
4785      case Iex_Const:
4786         tl_assert(shadow->tag == Iex_Const);
4787         break;
4788      case Iex_RdTmp:
4789         tl_assert(shadow->tag == Iex_RdTmp);
4790         if (how == 'V') {
4791            assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4792                   shadow);
4793         } else {
4794            tl_assert(how == 'B');
4795            assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4796                   shadow);
4797         }
4798         break;
4799      default:
4800         tl_assert(0);
4801   }
4802}
4803
4804
4805static
4806void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4807{
4808   /* Scheme is (both single- and double- cases):
4809
4810      1. fetch data#,dataB (the proposed new value)
4811
4812      2. fetch expd#,expdB (what we expect to see at the address)
4813
4814      3. check definedness of address
4815
4816      4. load old#,oldB from shadow memory; this also checks
4817         addressibility of the address
4818
4819      5. the CAS itself
4820
4821      6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4822
4823      7. if "expected == old" (as computed by (6))
4824            store data#,dataB to shadow memory
4825
4826      Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4827      'data' but 7 stores 'data#'.  Hence it is possible for the
4828      shadow data to be incorrectly checked and/or updated:
4829
4830      * 7 is at least gated correctly, since the 'expected == old'
4831        condition is derived from outputs of 5.  However, the shadow
4832        write could happen too late: imagine after 5 we are
4833        descheduled, a different thread runs, writes a different
4834        (shadow) value at the address, and then we resume, hence
4835        overwriting the shadow value written by the other thread.
4836
4837      Because the original memory access is atomic, there's no way to
4838      make both the original and shadow accesses into a single atomic
4839      thing, hence this is unavoidable.
4840
4841      At least as Valgrind stands, I don't think it's a problem, since
4842      we're single threaded *and* we guarantee that there are no
4843      context switches during the execution of any specific superblock
4844      -- context switches can only happen at superblock boundaries.
4845
4846      If Valgrind ever becomes MT in the future, then it might be more
4847      of a problem.  A possible kludge would be to artificially
4848      associate with the location, a lock, which we must acquire and
4849      release around the transaction as a whole.  Hmm, that probably
4850      would't work properly since it only guards us against other
4851      threads doing CASs on the same location, not against other
4852      threads doing normal reads and writes.
4853
4854      ------------------------------------------------------------
4855
4856      COMMENT_ON_CasCmpEQ:
4857
4858      Note two things.  Firstly, in the sequence above, we compute
4859      "expected == old", but we don't check definedness of it.  Why
4860      not?  Also, the x86 and amd64 front ends use
4861      Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4862      determination (expected == old ?) for themselves, and we also
4863      don't check definedness for those primops; we just say that the
4864      result is defined.  Why?  Details follow.
4865
4866      x86/amd64 contains various forms of locked insns:
4867      * lock prefix before all basic arithmetic insn;
4868        eg lock xorl %reg1,(%reg2)
4869      * atomic exchange reg-mem
4870      * compare-and-swaps
4871
4872      Rather than attempt to represent them all, which would be a
4873      royal PITA, I used a result from Maurice Herlihy
4874      (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4875      demonstrates that compare-and-swap is a primitive more general
4876      than the other two, and so can be used to represent all of them.
4877      So the translation scheme for (eg) lock incl (%reg) is as
4878      follows:
4879
4880        again:
4881         old = * %reg
4882         new = old + 1
4883         atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4884
4885      The "atomically" is the CAS bit.  The scheme is always the same:
4886      get old value from memory, compute new value, atomically stuff
4887      new value back in memory iff the old value has not changed (iow,
4888      no other thread modified it in the meantime).  If it has changed
4889      then we've been out-raced and we have to start over.
4890
4891      Now that's all very neat, but it has the bad side effect of
4892      introducing an explicit equality test into the translation.
4893      Consider the behaviour of said code on a memory location which
4894      is uninitialised.  We will wind up doing a comparison on
4895      uninitialised data, and mc duly complains.
4896
4897      What's difficult about this is, the common case is that the
4898      location is uncontended, and so we're usually comparing the same
4899      value (* %reg) with itself.  So we shouldn't complain even if it
4900      is undefined.  But mc doesn't know that.
4901
4902      My solution is to mark the == in the IR specially, so as to tell
4903      mc that it almost certainly compares a value with itself, and we
4904      should just regard the result as always defined.  Rather than
4905      add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4906      Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4907
4908      So there's always the question of, can this give a false
4909      negative?  eg, imagine that initially, * %reg is defined; and we
4910      read that; but then in the gap between the read and the CAS, a
4911      different thread writes an undefined (and different) value at
4912      the location.  Then the CAS in this thread will fail and we will
4913      go back to "again:", but without knowing that the trip back
4914      there was based on an undefined comparison.  No matter; at least
4915      the other thread won the race and the location is correctly
4916      marked as undefined.  What if it wrote an uninitialised version
4917      of the same value that was there originally, though?
4918
4919      etc etc.  Seems like there's a small corner case in which we
4920      might lose the fact that something's defined -- we're out-raced
4921      in between the "old = * reg" and the "atomically {", _and_ the
4922      other thread is writing in an undefined version of what's
4923      already there.  Well, that seems pretty unlikely.
4924
4925      ---
4926
4927      If we ever need to reinstate it .. code which generates a
4928      definedness test for "expected == old" was removed at r10432 of
4929      this file.
4930   */
4931   if (cas->oldHi == IRTemp_INVALID) {
4932      do_shadow_CAS_single( mce, cas );
4933   } else {
4934      do_shadow_CAS_double( mce, cas );
4935   }
4936}
4937
4938
4939static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4940{
4941   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4942   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4943   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4944   IRAtom *expd_eq_old = NULL;
4945   IROp   opCasCmpEQ;
4946   Int    elemSzB;
4947   IRType elemTy;
4948   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4949
4950   /* single CAS */
4951   tl_assert(cas->oldHi == IRTemp_INVALID);
4952   tl_assert(cas->expdHi == NULL);
4953   tl_assert(cas->dataHi == NULL);
4954
4955   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4956   switch (elemTy) {
4957      case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4958      case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4959      case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4960      case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4961      default: tl_assert(0); /* IR defn disallows any other types */
4962   }
4963
4964   /* 1. fetch data# (the proposed new value) */
4965   tl_assert(isOriginalAtom(mce, cas->dataLo));
4966   vdataLo
4967      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4968   tl_assert(isShadowAtom(mce, vdataLo));
4969   if (otrak) {
4970      bdataLo
4971         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4972      tl_assert(isShadowAtom(mce, bdataLo));
4973   }
4974
4975   /* 2. fetch expected# (what we expect to see at the address) */
4976   tl_assert(isOriginalAtom(mce, cas->expdLo));
4977   vexpdLo
4978      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4979   tl_assert(isShadowAtom(mce, vexpdLo));
4980   if (otrak) {
4981      bexpdLo
4982         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4983      tl_assert(isShadowAtom(mce, bexpdLo));
4984   }
4985
4986   /* 3. check definedness of address */
4987   /* 4. fetch old# from shadow memory; this also checks
4988         addressibility of the address */
4989   voldLo
4990      = assignNew(
4991           'V', mce, elemTy,
4992           expr2vbits_Load(
4993              mce,
4994              cas->end, elemTy, cas->addr, 0/*Addr bias*/
4995        ));
4996   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4997   if (otrak) {
4998      boldLo
4999         = assignNew('B', mce, Ity_I32,
5000                     gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
5001      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5002   }
5003
5004   /* 5. the CAS itself */
5005   stmt( 'C', mce, IRStmt_CAS(cas) );
5006
5007   /* 6. compute "expected == old" */
5008   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5009   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5010      tree, but it's not copied from the input block. */
5011   expd_eq_old
5012      = assignNew('C', mce, Ity_I1,
5013                  binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
5014
5015   /* 7. if "expected == old"
5016            store data# to shadow memory */
5017   do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
5018                    NULL/*data*/, vdataLo/*vdata*/,
5019                    expd_eq_old/*guard for store*/ );
5020   if (otrak) {
5021      gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
5022                   bdataLo/*bdata*/,
5023                   expd_eq_old/*guard for store*/ );
5024   }
5025}
5026
5027
5028static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
5029{
5030   IRAtom *vdataHi = NULL, *bdataHi = NULL;
5031   IRAtom *vdataLo = NULL, *bdataLo = NULL;
5032   IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
5033   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
5034   IRAtom *voldHi  = NULL, *boldHi  = NULL;
5035   IRAtom *voldLo  = NULL, *boldLo  = NULL;
5036   IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
5037   IRAtom *expd_eq_old = NULL, *zero = NULL;
5038   IROp   opCasCmpEQ, opOr, opXor;
5039   Int    elemSzB, memOffsLo, memOffsHi;
5040   IRType elemTy;
5041   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
5042
5043   /* double CAS */
5044   tl_assert(cas->oldHi != IRTemp_INVALID);
5045   tl_assert(cas->expdHi != NULL);
5046   tl_assert(cas->dataHi != NULL);
5047
5048   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
5049   switch (elemTy) {
5050      case Ity_I8:
5051         opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
5052         elemSzB = 1; zero = mkU8(0);
5053         break;
5054      case Ity_I16:
5055         opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
5056         elemSzB = 2; zero = mkU16(0);
5057         break;
5058      case Ity_I32:
5059         opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
5060         elemSzB = 4; zero = mkU32(0);
5061         break;
5062      case Ity_I64:
5063         opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
5064         elemSzB = 8; zero = mkU64(0);
5065         break;
5066      default:
5067         tl_assert(0); /* IR defn disallows any other types */
5068   }
5069
5070   /* 1. fetch data# (the proposed new value) */
5071   tl_assert(isOriginalAtom(mce, cas->dataHi));
5072   tl_assert(isOriginalAtom(mce, cas->dataLo));
5073   vdataHi
5074      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
5075   vdataLo
5076      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
5077   tl_assert(isShadowAtom(mce, vdataHi));
5078   tl_assert(isShadowAtom(mce, vdataLo));
5079   if (otrak) {
5080      bdataHi
5081         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
5082      bdataLo
5083         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
5084      tl_assert(isShadowAtom(mce, bdataHi));
5085      tl_assert(isShadowAtom(mce, bdataLo));
5086   }
5087
5088   /* 2. fetch expected# (what we expect to see at the address) */
5089   tl_assert(isOriginalAtom(mce, cas->expdHi));
5090   tl_assert(isOriginalAtom(mce, cas->expdLo));
5091   vexpdHi
5092      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
5093   vexpdLo
5094      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5095   tl_assert(isShadowAtom(mce, vexpdHi));
5096   tl_assert(isShadowAtom(mce, vexpdLo));
5097   if (otrak) {
5098      bexpdHi
5099         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5100      bexpdLo
5101         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5102      tl_assert(isShadowAtom(mce, bexpdHi));
5103      tl_assert(isShadowAtom(mce, bexpdLo));
5104   }
5105
5106   /* 3. check definedness of address */
5107   /* 4. fetch old# from shadow memory; this also checks
5108         addressibility of the address */
5109   if (cas->end == Iend_LE) {
5110      memOffsLo = 0;
5111      memOffsHi = elemSzB;
5112   } else {
5113      tl_assert(cas->end == Iend_BE);
5114      memOffsLo = elemSzB;
5115      memOffsHi = 0;
5116   }
5117   voldHi
5118      = assignNew(
5119           'V', mce, elemTy,
5120           expr2vbits_Load(
5121              mce,
5122              cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5123        ));
5124   voldLo
5125      = assignNew(
5126           'V', mce, elemTy,
5127           expr2vbits_Load(
5128              mce,
5129              cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5130        ));
5131   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5132   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5133   if (otrak) {
5134      boldHi
5135         = assignNew('B', mce, Ity_I32,
5136                     gen_load_b(mce, elemSzB, cas->addr,
5137                                memOffsHi/*addr bias*/));
5138      boldLo
5139         = assignNew('B', mce, Ity_I32,
5140                     gen_load_b(mce, elemSzB, cas->addr,
5141                                memOffsLo/*addr bias*/));
5142      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5143      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5144   }
5145
5146   /* 5. the CAS itself */
5147   stmt( 'C', mce, IRStmt_CAS(cas) );
5148
5149   /* 6. compute "expected == old" */
5150   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5151   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5152      tree, but it's not copied from the input block. */
5153   /*
5154      xHi = oldHi ^ expdHi;
5155      xLo = oldLo ^ expdLo;
5156      xHL = xHi | xLo;
5157      expd_eq_old = xHL == 0;
5158   */
5159   xHi = assignNew('C', mce, elemTy,
5160                   binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5161   xLo = assignNew('C', mce, elemTy,
5162                   binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5163   xHL = assignNew('C', mce, elemTy,
5164                   binop(opOr, xHi, xLo));
5165   expd_eq_old
5166      = assignNew('C', mce, Ity_I1,
5167                  binop(opCasCmpEQ, xHL, zero));
5168
5169   /* 7. if "expected == old"
5170            store data# to shadow memory */
5171   do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5172                    NULL/*data*/, vdataHi/*vdata*/,
5173                    expd_eq_old/*guard for store*/ );
5174   do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5175                    NULL/*data*/, vdataLo/*vdata*/,
5176                    expd_eq_old/*guard for store*/ );
5177   if (otrak) {
5178      gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5179                   bdataHi/*bdata*/,
5180                   expd_eq_old/*guard for store*/ );
5181      gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5182                   bdataLo/*bdata*/,
5183                   expd_eq_old/*guard for store*/ );
5184   }
5185}
5186
5187
5188/* ------ Dealing with LL/SC (not difficult) ------ */
5189
5190static void do_shadow_LLSC ( MCEnv*    mce,
5191                             IREndness stEnd,
5192                             IRTemp    stResult,
5193                             IRExpr*   stAddr,
5194                             IRExpr*   stStoredata )
5195{
5196   /* In short: treat a load-linked like a normal load followed by an
5197      assignment of the loaded (shadow) data to the result temporary.
5198      Treat a store-conditional like a normal store, and mark the
5199      result temporary as defined. */
5200   IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
5201   IRTemp resTmp = findShadowTmpV(mce, stResult);
5202
5203   tl_assert(isIRAtom(stAddr));
5204   if (stStoredata)
5205      tl_assert(isIRAtom(stStoredata));
5206
5207   if (stStoredata == NULL) {
5208      /* Load Linked */
5209      /* Just treat this as a normal load, followed by an assignment of
5210         the value to .result. */
5211      /* Stay sane */
5212      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5213                || resTy == Ity_I16 || resTy == Ity_I8);
5214      assign( 'V', mce, resTmp,
5215                   expr2vbits_Load(
5216                      mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5217   } else {
5218      /* Store Conditional */
5219      /* Stay sane */
5220      IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5221                                   stStoredata);
5222      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5223                || dataTy == Ity_I16 || dataTy == Ity_I8);
5224      do_shadow_Store( mce, stEnd,
5225                            stAddr, 0/* addr bias */,
5226                            stStoredata,
5227                            NULL /* shadow data */,
5228                            NULL/*guard*/ );
5229      /* This is a store conditional, so it writes to .result a value
5230         indicating whether or not the store succeeded.  Just claim
5231         this value is always defined.  In the PowerPC interpretation
5232         of store-conditional, definedness of the success indication
5233         depends on whether the address of the store matches the
5234         reservation address.  But we can't tell that here (and
5235         anyway, we're not being PowerPC-specific).  At least we are
5236         guaranteed that the definedness of the store address, and its
5237         addressibility, will be checked as per normal.  So it seems
5238         pretty safe to just say that the success indication is always
5239         defined.
5240
5241         In schemeS, for origin tracking, we must correspondingly set
5242         a no-origin value for the origin shadow of .result.
5243      */
5244      tl_assert(resTy == Ity_I1);
5245      assign( 'V', mce, resTmp, definedOfType(resTy) );
5246   }
5247}
5248
5249
5250/*------------------------------------------------------------*/
5251/*--- Memcheck main                                        ---*/
5252/*------------------------------------------------------------*/
5253
5254static void schemeS ( MCEnv* mce, IRStmt* st );
5255
5256static Bool isBogusAtom ( IRAtom* at )
5257{
5258   ULong n = 0;
5259   IRConst* con;
5260   tl_assert(isIRAtom(at));
5261   if (at->tag == Iex_RdTmp)
5262      return False;
5263   tl_assert(at->tag == Iex_Const);
5264   con = at->Iex.Const.con;
5265   switch (con->tag) {
5266      case Ico_U1:   return False;
5267      case Ico_U8:   n = (ULong)con->Ico.U8; break;
5268      case Ico_U16:  n = (ULong)con->Ico.U16; break;
5269      case Ico_U32:  n = (ULong)con->Ico.U32; break;
5270      case Ico_U64:  n = (ULong)con->Ico.U64; break;
5271      case Ico_F64:  return False;
5272      case Ico_F32i: return False;
5273      case Ico_F64i: return False;
5274      case Ico_V128: return False;
5275      default: ppIRExpr(at); tl_assert(0);
5276   }
5277   /* VG_(printf)("%llx\n", n); */
5278   return (/*32*/    n == 0xFEFEFEFFULL
5279           /*32*/ || n == 0x80808080ULL
5280           /*32*/ || n == 0x7F7F7F7FULL
5281           /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
5282           /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
5283           /*64*/ || n == 0x0000000000008080ULL
5284           /*64*/ || n == 0x8080808080808080ULL
5285           /*64*/ || n == 0x0101010101010101ULL
5286          );
5287}
5288
5289static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5290{
5291   Int      i;
5292   IRExpr*  e;
5293   IRDirty* d;
5294   IRCAS*   cas;
5295   switch (st->tag) {
5296      case Ist_WrTmp:
5297         e = st->Ist.WrTmp.data;
5298         switch (e->tag) {
5299            case Iex_Get:
5300            case Iex_RdTmp:
5301               return False;
5302            case Iex_Const:
5303               return isBogusAtom(e);
5304            case Iex_Unop:
5305               return isBogusAtom(e->Iex.Unop.arg);
5306            case Iex_GetI:
5307               return isBogusAtom(e->Iex.GetI.ix);
5308            case Iex_Binop:
5309               return isBogusAtom(e->Iex.Binop.arg1)
5310                      || isBogusAtom(e->Iex.Binop.arg2);
5311            case Iex_Triop:
5312               return isBogusAtom(e->Iex.Triop.details->arg1)
5313                      || isBogusAtom(e->Iex.Triop.details->arg2)
5314                      || isBogusAtom(e->Iex.Triop.details->arg3);
5315            case Iex_Qop:
5316               return isBogusAtom(e->Iex.Qop.details->arg1)
5317                      || isBogusAtom(e->Iex.Qop.details->arg2)
5318                      || isBogusAtom(e->Iex.Qop.details->arg3)
5319                      || isBogusAtom(e->Iex.Qop.details->arg4);
5320            case Iex_Mux0X:
5321               return isBogusAtom(e->Iex.Mux0X.cond)
5322                      || isBogusAtom(e->Iex.Mux0X.expr0)
5323                      || isBogusAtom(e->Iex.Mux0X.exprX);
5324            case Iex_Load:
5325               return isBogusAtom(e->Iex.Load.addr);
5326            case Iex_CCall:
5327               for (i = 0; e->Iex.CCall.args[i]; i++)
5328                  if (isBogusAtom(e->Iex.CCall.args[i]))
5329                     return True;
5330               return False;
5331            default:
5332               goto unhandled;
5333         }
5334      case Ist_Dirty:
5335         d = st->Ist.Dirty.details;
5336         for (i = 0; d->args[i]; i++)
5337            if (isBogusAtom(d->args[i]))
5338               return True;
5339         if (d->guard && isBogusAtom(d->guard))
5340            return True;
5341         if (d->mAddr && isBogusAtom(d->mAddr))
5342            return True;
5343         return False;
5344      case Ist_Put:
5345         return isBogusAtom(st->Ist.Put.data);
5346      case Ist_PutI:
5347         return isBogusAtom(st->Ist.PutI.details->ix)
5348                || isBogusAtom(st->Ist.PutI.details->data);
5349      case Ist_Store:
5350         return isBogusAtom(st->Ist.Store.addr)
5351                || isBogusAtom(st->Ist.Store.data);
5352      case Ist_Exit:
5353         return isBogusAtom(st->Ist.Exit.guard);
5354      case Ist_AbiHint:
5355         return isBogusAtom(st->Ist.AbiHint.base)
5356                || isBogusAtom(st->Ist.AbiHint.nia);
5357      case Ist_NoOp:
5358      case Ist_IMark:
5359      case Ist_MBE:
5360         return False;
5361      case Ist_CAS:
5362         cas = st->Ist.CAS.details;
5363         return isBogusAtom(cas->addr)
5364                || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5365                || isBogusAtom(cas->expdLo)
5366                || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5367                || isBogusAtom(cas->dataLo);
5368      case Ist_LLSC:
5369         return isBogusAtom(st->Ist.LLSC.addr)
5370                || (st->Ist.LLSC.storedata
5371                       ? isBogusAtom(st->Ist.LLSC.storedata)
5372                       : False);
5373      default:
5374      unhandled:
5375         ppIRStmt(st);
5376         VG_(tool_panic)("hasBogusLiterals");
5377   }
5378}
5379
5380
5381IRSB* MC_(instrument) ( VgCallbackClosure* closure,
5382                        IRSB* sb_in,
5383                        VexGuestLayout* layout,
5384                        VexGuestExtents* vge,
5385                        VexArchInfo* archinfo_host,
5386                        IRType gWordTy, IRType hWordTy )
5387{
5388   Bool    verboze = 0||False;
5389   Bool    bogus;
5390   Int     i, j, first_stmt;
5391   IRStmt* st;
5392   MCEnv   mce;
5393   IRSB*   sb_out;
5394
5395   if (gWordTy != hWordTy) {
5396      /* We don't currently support this case. */
5397      VG_(tool_panic)("host/guest word size mismatch");
5398   }
5399
5400   /* Check we're not completely nuts */
5401   tl_assert(sizeof(UWord)  == sizeof(void*));
5402   tl_assert(sizeof(Word)   == sizeof(void*));
5403   tl_assert(sizeof(Addr)   == sizeof(void*));
5404   tl_assert(sizeof(ULong)  == 8);
5405   tl_assert(sizeof(Long)   == 8);
5406   tl_assert(sizeof(Addr64) == 8);
5407   tl_assert(sizeof(UInt)   == 4);
5408   tl_assert(sizeof(Int)    == 4);
5409
5410   tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
5411
5412   /* Set up SB */
5413   sb_out = deepCopyIRSBExceptStmts(sb_in);
5414
5415   /* Set up the running environment.  Both .sb and .tmpMap are
5416      modified as we go along.  Note that tmps are added to both
5417      .sb->tyenv and .tmpMap together, so the valid index-set for
5418      those two arrays should always be identical. */
5419   VG_(memset)(&mce, 0, sizeof(mce));
5420   mce.sb             = sb_out;
5421   mce.trace          = verboze;
5422   mce.layout         = layout;
5423   mce.hWordTy        = hWordTy;
5424   mce.bogusLiterals  = False;
5425
5426   /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5427      Darwin.  10.7 is mostly built with LLVM, which uses these for
5428      bitfield inserts, and we get a lot of false errors if the cheap
5429      interpretation is used, alas.  Could solve this much better if
5430      we knew which of such adds came from x86/amd64 LEA instructions,
5431      since these are the only ones really needing the expensive
5432      interpretation, but that would require some way to tag them in
5433      the _toIR.c front ends, which is a lot of faffing around.  So
5434      for now just use the slow and blunt-instrument solution. */
5435   mce.useLLVMworkarounds = False;
5436#  if defined(VGO_darwin)
5437   mce.useLLVMworkarounds = True;
5438#  endif
5439
5440   mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5441                            sizeof(TempMapEnt));
5442   for (i = 0; i < sb_in->tyenv->types_used; i++) {
5443      TempMapEnt ent;
5444      ent.kind    = Orig;
5445      ent.shadowV = IRTemp_INVALID;
5446      ent.shadowB = IRTemp_INVALID;
5447      VG_(addToXA)( mce.tmpMap, &ent );
5448   }
5449   tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
5450
5451   /* Make a preliminary inspection of the statements, to see if there
5452      are any dodgy-looking literals.  If there are, we generate
5453      extra-detailed (hence extra-expensive) instrumentation in
5454      places.  Scan the whole bb even if dodgyness is found earlier,
5455      so that the flatness assertion is applied to all stmts. */
5456
5457   bogus = False;
5458
5459   for (i = 0; i < sb_in->stmts_used; i++) {
5460
5461      st = sb_in->stmts[i];
5462      tl_assert(st);
5463      tl_assert(isFlatIRStmt(st));
5464
5465      if (!bogus) {
5466         bogus = checkForBogusLiterals(st);
5467         if (0 && bogus) {
5468            VG_(printf)("bogus: ");
5469            ppIRStmt(st);
5470            VG_(printf)("\n");
5471         }
5472      }
5473
5474   }
5475
5476   mce.bogusLiterals = bogus;
5477
5478   /* Copy verbatim any IR preamble preceding the first IMark */
5479
5480   tl_assert(mce.sb == sb_out);
5481   tl_assert(mce.sb != sb_in);
5482
5483   i = 0;
5484   while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
5485
5486      st = sb_in->stmts[i];
5487      tl_assert(st);
5488      tl_assert(isFlatIRStmt(st));
5489
5490      stmt( 'C', &mce, sb_in->stmts[i] );
5491      i++;
5492   }
5493
5494   /* Nasty problem.  IR optimisation of the pre-instrumented IR may
5495      cause the IR following the preamble to contain references to IR
5496      temporaries defined in the preamble.  Because the preamble isn't
5497      instrumented, these temporaries don't have any shadows.
5498      Nevertheless uses of them following the preamble will cause
5499      memcheck to generate references to their shadows.  End effect is
5500      to cause IR sanity check failures, due to references to
5501      non-existent shadows.  This is only evident for the complex
5502      preambles used for function wrapping on TOC-afflicted platforms
5503      (ppc64-linux).
5504
5505      The following loop therefore scans the preamble looking for
5506      assignments to temporaries.  For each one found it creates an
5507      assignment to the corresponding (V) shadow temp, marking it as
5508      'defined'.  This is the same resulting IR as if the main
5509      instrumentation loop before had been applied to the statement
5510      'tmp = CONSTANT'.
5511
5512      Similarly, if origin tracking is enabled, we must generate an
5513      assignment for the corresponding origin (B) shadow, claiming
5514      no-origin, as appropriate for a defined value.
5515   */
5516   for (j = 0; j < i; j++) {
5517      if (sb_in->stmts[j]->tag == Ist_WrTmp) {
5518         /* findShadowTmpV checks its arg is an original tmp;
5519            no need to assert that here. */
5520         IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
5521         IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
5522         IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
5523         assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5524         if (MC_(clo_mc_level) == 3) {
5525            IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
5526            tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
5527            assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5528         }
5529         if (0) {
5530            VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5531            ppIRType( ty_v );
5532            VG_(printf)("\n");
5533         }
5534      }
5535   }
5536
5537   /* Iterate over the remaining stmts to generate instrumentation. */
5538
5539   tl_assert(sb_in->stmts_used > 0);
5540   tl_assert(i >= 0);
5541   tl_assert(i < sb_in->stmts_used);
5542   tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5543
5544   for (/* use current i*/; i < sb_in->stmts_used; i++) {
5545
5546      st = sb_in->stmts[i];
5547      first_stmt = sb_out->stmts_used;
5548
5549      if (verboze) {
5550         VG_(printf)("\n");
5551         ppIRStmt(st);
5552         VG_(printf)("\n");
5553      }
5554
5555      if (MC_(clo_mc_level) == 3) {
5556         /* See comments on case Ist_CAS below. */
5557         if (st->tag != Ist_CAS)
5558            schemeS( &mce, st );
5559      }
5560
5561      /* Generate instrumentation code for each stmt ... */
5562
5563      switch (st->tag) {
5564
5565         case Ist_WrTmp:
5566            assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5567                               expr2vbits( &mce, st->Ist.WrTmp.data) );
5568            break;
5569
5570         case Ist_Put:
5571            do_shadow_PUT( &mce,
5572                           st->Ist.Put.offset,
5573                           st->Ist.Put.data,
5574                           NULL /* shadow atom */, NULL /* guard */ );
5575            break;
5576
5577         case Ist_PutI:
5578            do_shadow_PUTI( &mce, st->Ist.PutI.details);
5579            break;
5580
5581         case Ist_Store:
5582            do_shadow_Store( &mce, st->Ist.Store.end,
5583                                   st->Ist.Store.addr, 0/* addr bias */,
5584                                   st->Ist.Store.data,
5585                                   NULL /* shadow data */,
5586                                   NULL/*guard*/ );
5587            break;
5588
5589         case Ist_Exit:
5590            complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
5591            break;
5592
5593         case Ist_IMark:
5594            break;
5595
5596         case Ist_NoOp:
5597         case Ist_MBE:
5598            break;
5599
5600         case Ist_Dirty:
5601            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5602            break;
5603
5604         case Ist_AbiHint:
5605            do_AbiHint( &mce, st->Ist.AbiHint.base,
5606                              st->Ist.AbiHint.len,
5607                              st->Ist.AbiHint.nia );
5608            break;
5609
5610         case Ist_CAS:
5611            do_shadow_CAS( &mce, st->Ist.CAS.details );
5612            /* Note, do_shadow_CAS copies the CAS itself to the output
5613               block, because it needs to add instrumentation both
5614               before and after it.  Hence skip the copy below.  Also
5615               skip the origin-tracking stuff (call to schemeS) above,
5616               since that's all tangled up with it too; do_shadow_CAS
5617               does it all. */
5618            break;
5619
5620         case Ist_LLSC:
5621            do_shadow_LLSC( &mce,
5622                            st->Ist.LLSC.end,
5623                            st->Ist.LLSC.result,
5624                            st->Ist.LLSC.addr,
5625                            st->Ist.LLSC.storedata );
5626            break;
5627
5628         default:
5629            VG_(printf)("\n");
5630            ppIRStmt(st);
5631            VG_(printf)("\n");
5632            VG_(tool_panic)("memcheck: unhandled IRStmt");
5633
5634      } /* switch (st->tag) */
5635
5636      if (0 && verboze) {
5637         for (j = first_stmt; j < sb_out->stmts_used; j++) {
5638            VG_(printf)("   ");
5639            ppIRStmt(sb_out->stmts[j]);
5640            VG_(printf)("\n");
5641         }
5642         VG_(printf)("\n");
5643      }
5644
5645      /* ... and finally copy the stmt itself to the output.  Except,
5646         skip the copy of IRCASs; see comments on case Ist_CAS
5647         above. */
5648      if (st->tag != Ist_CAS)
5649         stmt('C', &mce, st);
5650   }
5651
5652   /* Now we need to complain if the jump target is undefined. */
5653   first_stmt = sb_out->stmts_used;
5654
5655   if (verboze) {
5656      VG_(printf)("sb_in->next = ");
5657      ppIRExpr(sb_in->next);
5658      VG_(printf)("\n\n");
5659   }
5660
5661   complainIfUndefined( &mce, sb_in->next, NULL );
5662
5663   if (0 && verboze) {
5664      for (j = first_stmt; j < sb_out->stmts_used; j++) {
5665         VG_(printf)("   ");
5666         ppIRStmt(sb_out->stmts[j]);
5667         VG_(printf)("\n");
5668      }
5669      VG_(printf)("\n");
5670   }
5671
5672   /* If this fails, there's been some serious snafu with tmp management,
5673      that should be investigated. */
5674   tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5675   VG_(deleteXA)( mce.tmpMap );
5676
5677   tl_assert(mce.sb == sb_out);
5678   return sb_out;
5679}
5680
5681/*------------------------------------------------------------*/
5682/*--- Post-tree-build final tidying                        ---*/
5683/*------------------------------------------------------------*/
5684
5685/* This exploits the observation that Memcheck often produces
5686   repeated conditional calls of the form
5687
5688   Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5689
5690   with the same guard expression G guarding the same helper call.
5691   The second and subsequent calls are redundant.  This usually
5692   results from instrumentation of guest code containing multiple
5693   memory references at different constant offsets from the same base
5694   register.  After optimisation of the instrumentation, you get a
5695   test for the definedness of the base register for each memory
5696   reference, which is kinda pointless.  MC_(final_tidy) therefore
5697   looks for such repeated calls and removes all but the first. */
5698
5699/* A struct for recording which (helper, guard) pairs we have already
5700   seen. */
5701typedef
5702   struct { void* entry; IRExpr* guard; }
5703   Pair;
5704
5705/* Return True if e1 and e2 definitely denote the same value (used to
5706   compare guards).  Return False if unknown; False is the safe
5707   answer.  Since guest registers and guest memory do not have the
5708   SSA property we must return False if any Gets or Loads appear in
5709   the expression. */
5710
5711static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5712{
5713   if (e1->tag != e2->tag)
5714      return False;
5715   switch (e1->tag) {
5716      case Iex_Const:
5717         return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5718      case Iex_Binop:
5719         return e1->Iex.Binop.op == e2->Iex.Binop.op
5720                && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5721                && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5722      case Iex_Unop:
5723         return e1->Iex.Unop.op == e2->Iex.Unop.op
5724                && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5725      case Iex_RdTmp:
5726         return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5727      case Iex_Mux0X:
5728         return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5729                && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5730                && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5731      case Iex_Qop:
5732      case Iex_Triop:
5733      case Iex_CCall:
5734         /* be lazy.  Could define equality for these, but they never
5735            appear to be used. */
5736         return False;
5737      case Iex_Get:
5738      case Iex_GetI:
5739      case Iex_Load:
5740         /* be conservative - these may not give the same value each
5741            time */
5742         return False;
5743      case Iex_Binder:
5744         /* should never see this */
5745         /* fallthrough */
5746      default:
5747         VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5748         ppIRExpr(e1);
5749         VG_(tool_panic)("memcheck:sameIRValue");
5750         return False;
5751   }
5752}
5753
5754/* See if 'pairs' already has an entry for (entry, guard).  Return
5755   True if so.  If not, add an entry. */
5756
5757static
5758Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5759{
5760   Pair  p;
5761   Pair* pp;
5762   Int   i, n = VG_(sizeXA)( pairs );
5763   for (i = 0; i < n; i++) {
5764      pp = VG_(indexXA)( pairs, i );
5765      if (pp->entry == entry && sameIRValue(pp->guard, guard))
5766         return True;
5767   }
5768   p.guard = guard;
5769   p.entry = entry;
5770   VG_(addToXA)( pairs, &p );
5771   return False;
5772}
5773
5774static Bool is_helperc_value_checkN_fail ( const HChar* name )
5775{
5776   return
5777      0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5778      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5779      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5780      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5781      || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5782      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5783      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5784      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5785}
5786
5787IRSB* MC_(final_tidy) ( IRSB* sb_in )
5788{
5789   Int i;
5790   IRStmt*   st;
5791   IRDirty*  di;
5792   IRExpr*   guard;
5793   IRCallee* cee;
5794   Bool      alreadyPresent;
5795   XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5796                                 VG_(free), sizeof(Pair) );
5797   /* Scan forwards through the statements.  Each time a call to one
5798      of the relevant helpers is seen, check if we have made a
5799      previous call to the same helper using the same guard
5800      expression, and if so, delete the call. */
5801   for (i = 0; i < sb_in->stmts_used; i++) {
5802      st = sb_in->stmts[i];
5803      tl_assert(st);
5804      if (st->tag != Ist_Dirty)
5805         continue;
5806      di = st->Ist.Dirty.details;
5807      guard = di->guard;
5808      if (!guard)
5809         continue;
5810      if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5811      cee = di->cee;
5812      if (!is_helperc_value_checkN_fail( cee->name ))
5813         continue;
5814       /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5815          guard 'guard'.  Check if we have already seen a call to this
5816          function with the same guard.  If so, delete it.  If not,
5817          add it to the set of calls we do know about. */
5818      alreadyPresent = check_or_add( pairs, guard, cee->addr );
5819      if (alreadyPresent) {
5820         sb_in->stmts[i] = IRStmt_NoOp();
5821         if (0) VG_(printf)("XX\n");
5822      }
5823   }
5824   VG_(deleteXA)( pairs );
5825   return sb_in;
5826}
5827
5828
5829/*------------------------------------------------------------*/
5830/*--- Origin tracking stuff                                ---*/
5831/*------------------------------------------------------------*/
5832
5833/* Almost identical to findShadowTmpV. */
5834static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5835{
5836   TempMapEnt* ent;
5837   /* VG_(indexXA) range-checks 'orig', hence no need to check
5838      here. */
5839   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5840   tl_assert(ent->kind == Orig);
5841   if (ent->shadowB == IRTemp_INVALID) {
5842      IRTemp tmpB
5843        = newTemp( mce, Ity_I32, BSh );
5844      /* newTemp may cause mce->tmpMap to resize, hence previous results
5845         from VG_(indexXA) are invalid. */
5846      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5847      tl_assert(ent->kind == Orig);
5848      tl_assert(ent->shadowB == IRTemp_INVALID);
5849      ent->shadowB = tmpB;
5850   }
5851   return ent->shadowB;
5852}
5853
5854static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5855{
5856   return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5857}
5858
5859static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5860                            IRAtom* baseaddr, Int offset )
5861{
5862   void*    hFun;
5863   const HChar* hName;
5864   IRTemp   bTmp;
5865   IRDirty* di;
5866   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5867   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5868   IRAtom*  ea    = baseaddr;
5869   if (offset != 0) {
5870      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5871                                   : mkU64( (Long)(Int)offset );
5872      ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5873   }
5874   bTmp = newTemp(mce, mce->hWordTy, BSh);
5875
5876   switch (szB) {
5877      case 1: hFun  = (void*)&MC_(helperc_b_load1);
5878              hName = "MC_(helperc_b_load1)";
5879              break;
5880      case 2: hFun  = (void*)&MC_(helperc_b_load2);
5881              hName = "MC_(helperc_b_load2)";
5882              break;
5883      case 4: hFun  = (void*)&MC_(helperc_b_load4);
5884              hName = "MC_(helperc_b_load4)";
5885              break;
5886      case 8: hFun  = (void*)&MC_(helperc_b_load8);
5887              hName = "MC_(helperc_b_load8)";
5888              break;
5889      case 16: hFun  = (void*)&MC_(helperc_b_load16);
5890               hName = "MC_(helperc_b_load16)";
5891               break;
5892      case 32: hFun  = (void*)&MC_(helperc_b_load32);
5893               hName = "MC_(helperc_b_load32)";
5894               break;
5895      default:
5896         VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5897         tl_assert(0);
5898   }
5899   di = unsafeIRDirty_1_N(
5900           bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5901           mkIRExprVec_1( ea )
5902        );
5903   /* no need to mess with any annotations.  This call accesses
5904      neither guest state nor guest memory. */
5905   stmt( 'B', mce, IRStmt_Dirty(di) );
5906   if (mce->hWordTy == Ity_I64) {
5907      /* 64-bit host */
5908      IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5909      assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5910      return mkexpr(bTmp32);
5911   } else {
5912      /* 32-bit host */
5913      return mkexpr(bTmp);
5914   }
5915}
5916
5917static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5918                                    Int offset, IRAtom* guard )
5919{
5920  if (guard) {
5921     IRAtom *cond, *iffalse, *iftrue;
5922
5923     cond    = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5924     iftrue  = assignNew('B', mce, Ity_I32,
5925                         gen_load_b(mce, szB, baseaddr, offset));
5926     iffalse = mkU32(0);
5927
5928     return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5929  }
5930
5931  return gen_load_b(mce, szB, baseaddr, offset);
5932}
5933
5934/* Generate a shadow store.  guard :: Ity_I1 controls whether the
5935   store really happens; NULL means it unconditionally does. */
5936static void gen_store_b ( MCEnv* mce, Int szB,
5937                          IRAtom* baseaddr, Int offset, IRAtom* dataB,
5938                          IRAtom* guard )
5939{
5940   void*    hFun;
5941   const HChar* hName;
5942   IRDirty* di;
5943   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5944   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5945   IRAtom*  ea    = baseaddr;
5946   if (guard) {
5947      tl_assert(isOriginalAtom(mce, guard));
5948      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5949   }
5950   if (offset != 0) {
5951      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5952                                   : mkU64( (Long)(Int)offset );
5953      ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5954   }
5955   if (mce->hWordTy == Ity_I64)
5956      dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5957
5958   switch (szB) {
5959      case 1: hFun  = (void*)&MC_(helperc_b_store1);
5960              hName = "MC_(helperc_b_store1)";
5961              break;
5962      case 2: hFun  = (void*)&MC_(helperc_b_store2);
5963              hName = "MC_(helperc_b_store2)";
5964              break;
5965      case 4: hFun  = (void*)&MC_(helperc_b_store4);
5966              hName = "MC_(helperc_b_store4)";
5967              break;
5968      case 8: hFun  = (void*)&MC_(helperc_b_store8);
5969              hName = "MC_(helperc_b_store8)";
5970              break;
5971      case 16: hFun  = (void*)&MC_(helperc_b_store16);
5972               hName = "MC_(helperc_b_store16)";
5973               break;
5974      case 32: hFun  = (void*)&MC_(helperc_b_store32);
5975               hName = "MC_(helperc_b_store32)";
5976               break;
5977      default:
5978         tl_assert(0);
5979   }
5980   di = unsafeIRDirty_0_N( 2/*regparms*/,
5981           hName, VG_(fnptr_to_fnentry)( hFun ),
5982           mkIRExprVec_2( ea, dataB )
5983        );
5984   /* no need to mess with any annotations.  This call accesses
5985      neither guest state nor guest memory. */
5986   if (guard) di->guard = guard;
5987   stmt( 'B', mce, IRStmt_Dirty(di) );
5988}
5989
5990static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5991   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5992   if (eTy == Ity_I64)
5993      return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5994   if (eTy == Ity_I32)
5995      return e;
5996   tl_assert(0);
5997}
5998
5999static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
6000   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
6001   tl_assert(eTy == Ity_I32);
6002   if (dstTy == Ity_I64)
6003      return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
6004   tl_assert(0);
6005}
6006
6007
6008static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
6009{
6010   tl_assert(MC_(clo_mc_level) == 3);
6011
6012   switch (e->tag) {
6013
6014      case Iex_GetI: {
6015         IRRegArray* descr_b;
6016         IRAtom      *t1, *t2, *t3, *t4;
6017         IRRegArray* descr      = e->Iex.GetI.descr;
6018         IRType equivIntTy
6019            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6020         /* If this array is unshadowable for whatever reason, use the
6021            usual approximation. */
6022         if (equivIntTy == Ity_INVALID)
6023            return mkU32(0);
6024         tl_assert(sizeofIRType(equivIntTy) >= 4);
6025         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6026         descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6027                                 equivIntTy, descr->nElems );
6028         /* Do a shadow indexed get of the same size, giving t1.  Take
6029            the bottom 32 bits of it, giving t2.  Compute into t3 the
6030            origin for the index (almost certainly zero, but there's
6031            no harm in being completely general here, since iropt will
6032            remove any useless code), and fold it in, giving a final
6033            value t4. */
6034         t1 = assignNew( 'B', mce, equivIntTy,
6035                          IRExpr_GetI( descr_b, e->Iex.GetI.ix,
6036                                                e->Iex.GetI.bias ));
6037         t2 = narrowTo32( mce, t1 );
6038         t3 = schemeE( mce, e->Iex.GetI.ix );
6039         t4 = gen_maxU32( mce, t2, t3 );
6040         return t4;
6041      }
6042      case Iex_CCall: {
6043         Int i;
6044         IRAtom*  here;
6045         IRExpr** args = e->Iex.CCall.args;
6046         IRAtom*  curr = mkU32(0);
6047         for (i = 0; args[i]; i++) {
6048            tl_assert(i < 32);
6049            tl_assert(isOriginalAtom(mce, args[i]));
6050            /* Only take notice of this arg if the callee's
6051               mc-exclusion mask does not say it is to be excluded. */
6052            if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
6053               /* the arg is to be excluded from definedness checking.
6054                  Do nothing. */
6055               if (0) VG_(printf)("excluding %s(%d)\n",
6056                                  e->Iex.CCall.cee->name, i);
6057            } else {
6058               /* calculate the arg's definedness, and pessimistically
6059                  merge it in. */
6060               here = schemeE( mce, args[i] );
6061               curr = gen_maxU32( mce, curr, here );
6062            }
6063         }
6064         return curr;
6065      }
6066      case Iex_Load: {
6067         Int dszB;
6068         dszB = sizeofIRType(e->Iex.Load.ty);
6069         /* assert that the B value for the address is already
6070            available (somewhere) */
6071         tl_assert(isIRAtom(e->Iex.Load.addr));
6072         tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
6073         return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
6074      }
6075      case Iex_Mux0X: {
6076         IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
6077         IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
6078         IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
6079         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
6080      }
6081      case Iex_Qop: {
6082         IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
6083         IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
6084         IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
6085         IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
6086         return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
6087                                 gen_maxU32( mce, b3, b4 ) );
6088      }
6089      case Iex_Triop: {
6090         IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
6091         IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
6092         IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
6093         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
6094      }
6095      case Iex_Binop: {
6096         switch (e->Iex.Binop.op) {
6097            case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
6098            case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6099            case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6100            case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6101               /* Just say these all produce a defined result,
6102                  regardless of their arguments.  See
6103                  COMMENT_ON_CasCmpEQ in this file. */
6104               return mkU32(0);
6105            default: {
6106               IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6107               IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6108               return gen_maxU32( mce, b1, b2 );
6109            }
6110         }
6111         tl_assert(0);
6112         /*NOTREACHED*/
6113      }
6114      case Iex_Unop: {
6115         IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6116         return b1;
6117      }
6118      case Iex_Const:
6119         return mkU32(0);
6120      case Iex_RdTmp:
6121         return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6122      case Iex_Get: {
6123         Int b_offset = MC_(get_otrack_shadow_offset)(
6124                           e->Iex.Get.offset,
6125                           sizeofIRType(e->Iex.Get.ty)
6126                        );
6127         tl_assert(b_offset >= -1
6128                   && b_offset <= mce->layout->total_sizeB -4);
6129         if (b_offset >= 0) {
6130            /* FIXME: this isn't an atom! */
6131            return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6132                               Ity_I32 );
6133         }
6134         return mkU32(0);
6135      }
6136      default:
6137         VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6138         ppIRExpr(e);
6139         VG_(tool_panic)("memcheck:schemeE");
6140   }
6141}
6142
6143
6144static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6145{
6146   // This is a hacked version of do_shadow_Dirty
6147   Int       i, k, n, toDo, gSz, gOff;
6148   IRAtom    *here, *curr;
6149   IRTemp    dst;
6150
6151   /* First check the guard. */
6152   curr = schemeE( mce, d->guard );
6153
6154   /* Now round up all inputs and maxU32 over them. */
6155
6156   /* Inputs: unmasked args
6157      Note: arguments are evaluated REGARDLESS of the guard expression */
6158   for (i = 0; d->args[i]; i++) {
6159      if (d->cee->mcx_mask & (1<<i)) {
6160         /* ignore this arg */
6161      } else {
6162         here = schemeE( mce, d->args[i] );
6163         curr = gen_maxU32( mce, curr, here );
6164      }
6165   }
6166
6167   /* Inputs: guest state that we read. */
6168   for (i = 0; i < d->nFxState; i++) {
6169      tl_assert(d->fxState[i].fx != Ifx_None);
6170      if (d->fxState[i].fx == Ifx_Write)
6171         continue;
6172
6173      /* Enumerate the described state segments */
6174      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6175         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6176         gSz  = d->fxState[i].size;
6177
6178         /* Ignore any sections marked as 'always defined'. */
6179         if (isAlwaysDefd(mce, gOff, gSz)) {
6180            if (0)
6181            VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6182                        gOff, gSz);
6183            continue;
6184         }
6185
6186         /* This state element is read or modified.  So we need to
6187            consider it.  If larger than 4 bytes, deal with it in
6188            4-byte chunks. */
6189         while (True) {
6190            Int b_offset;
6191            tl_assert(gSz >= 0);
6192            if (gSz == 0) break;
6193            n = gSz <= 4 ? gSz : 4;
6194            /* update 'curr' with maxU32 of the state slice
6195               gOff .. gOff+n-1 */
6196            b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6197            if (b_offset != -1) {
6198               /* Observe the guard expression. If it is false use 0, i.e.
6199                  nothing is known about the origin */
6200               IRAtom *cond, *iffalse, *iftrue;
6201
6202               cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6203               iffalse = mkU32(0);
6204               iftrue  = assignNew( 'B', mce, Ity_I32,
6205                                    IRExpr_Get(b_offset
6206                                                 + 2*mce->layout->total_sizeB,
6207                                               Ity_I32));
6208               here = assignNew( 'B', mce, Ity_I32,
6209                                 IRExpr_Mux0X(cond, iffalse, iftrue));
6210               curr = gen_maxU32( mce, curr, here );
6211            }
6212            gSz -= n;
6213            gOff += n;
6214         }
6215      }
6216   }
6217
6218   /* Inputs: memory */
6219
6220   if (d->mFx != Ifx_None) {
6221      /* Because we may do multiple shadow loads/stores from the same
6222         base address, it's best to do a single test of its
6223         definedness right now.  Post-instrumentation optimisation
6224         should remove all but this test. */
6225      tl_assert(d->mAddr);
6226      here = schemeE( mce, d->mAddr );
6227      curr = gen_maxU32( mce, curr, here );
6228   }
6229
6230   /* Deal with memory inputs (reads or modifies) */
6231   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6232      toDo   = d->mSize;
6233      /* chew off 32-bit chunks.  We don't care about the endianness
6234         since it's all going to be condensed down to a single bit,
6235         but nevertheless choose an endianness which is hopefully
6236         native to the platform. */
6237      while (toDo >= 4) {
6238         here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6239                                    d->guard );
6240         curr = gen_maxU32( mce, curr, here );
6241         toDo -= 4;
6242      }
6243      /* handle possible 16-bit excess */
6244      while (toDo >= 2) {
6245         here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6246                                    d->guard );
6247         curr = gen_maxU32( mce, curr, here );
6248         toDo -= 2;
6249      }
6250      /* chew off the remaining 8-bit chunk, if any */
6251      if (toDo == 1) {
6252         here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6253                                    d->guard );
6254         curr = gen_maxU32( mce, curr, here );
6255         toDo -= 1;
6256      }
6257      tl_assert(toDo == 0);
6258   }
6259
6260   /* Whew!  So curr is a 32-bit B-value which should give an origin
6261      of some use if any of the inputs to the helper are undefined.
6262      Now we need to re-distribute the results to all destinations. */
6263
6264   /* Outputs: the destination temporary, if there is one. */
6265   if (d->tmp != IRTemp_INVALID) {
6266      dst   = findShadowTmpB(mce, d->tmp);
6267      assign( 'V', mce, dst, curr );
6268   }
6269
6270   /* Outputs: guest state that we write or modify. */
6271   for (i = 0; i < d->nFxState; i++) {
6272      tl_assert(d->fxState[i].fx != Ifx_None);
6273      if (d->fxState[i].fx == Ifx_Read)
6274         continue;
6275
6276      /* Enumerate the described state segments */
6277      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6278         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6279         gSz  = d->fxState[i].size;
6280
6281         /* Ignore any sections marked as 'always defined'. */
6282         if (isAlwaysDefd(mce, gOff, gSz))
6283            continue;
6284
6285         /* This state element is written or modified.  So we need to
6286            consider it.  If larger than 4 bytes, deal with it in
6287            4-byte chunks. */
6288         while (True) {
6289            Int b_offset;
6290            tl_assert(gSz >= 0);
6291            if (gSz == 0) break;
6292            n = gSz <= 4 ? gSz : 4;
6293            /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6294            b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6295            if (b_offset != -1) {
6296               if (d->guard) {
6297                  /* If the guard expression evaluates to false we simply Put
6298                     the value that is already stored in the guest state slot */
6299                  IRAtom *cond, *iffalse;
6300
6301                  cond    = assignNew('B', mce, Ity_I8,
6302                                      unop(Iop_1Uto8, d->guard));
6303                  iffalse = assignNew('B', mce, Ity_I32,
6304                                      IRExpr_Get(b_offset +
6305                                                 2*mce->layout->total_sizeB,
6306                                                 Ity_I32));
6307                  curr = assignNew('V', mce, Ity_I32,
6308                                   IRExpr_Mux0X(cond, iffalse, curr));
6309               }
6310               stmt( 'B', mce, IRStmt_Put(b_offset
6311                                             + 2*mce->layout->total_sizeB,
6312                                          curr ));
6313            }
6314            gSz -= n;
6315            gOff += n;
6316         }
6317      }
6318   }
6319
6320   /* Outputs: memory that we write or modify.  Same comments about
6321      endianness as above apply. */
6322   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
6323      toDo   = d->mSize;
6324      /* chew off 32-bit chunks */
6325      while (toDo >= 4) {
6326         gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
6327                      d->guard );
6328         toDo -= 4;
6329      }
6330      /* handle possible 16-bit excess */
6331      while (toDo >= 2) {
6332        gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6333                     d->guard );
6334         toDo -= 2;
6335      }
6336      /* chew off the remaining 8-bit chunk, if any */
6337      if (toDo == 1) {
6338         gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
6339                      d->guard );
6340         toDo -= 1;
6341      }
6342      tl_assert(toDo == 0);
6343   }
6344}
6345
6346
6347static void do_origins_Store ( MCEnv* mce,
6348                               IREndness stEnd,
6349                               IRExpr* stAddr,
6350                               IRExpr* stData )
6351{
6352   Int     dszB;
6353   IRAtom* dataB;
6354   /* assert that the B value for the address is already available
6355      (somewhere), since the call to schemeE will want to see it.
6356      XXXX how does this actually ensure that?? */
6357   tl_assert(isIRAtom(stAddr));
6358   tl_assert(isIRAtom(stData));
6359   dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6360   dataB = schemeE( mce, stData );
6361   gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6362                     NULL/*guard*/ );
6363}
6364
6365
6366static void schemeS ( MCEnv* mce, IRStmt* st )
6367{
6368   tl_assert(MC_(clo_mc_level) == 3);
6369
6370   switch (st->tag) {
6371
6372      case Ist_AbiHint:
6373         /* The value-check instrumenter handles this - by arranging
6374            to pass the address of the next instruction to
6375            MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
6376            happen for origin tracking w.r.t. AbiHints.  So there is
6377            nothing to do here. */
6378         break;
6379
6380      case Ist_PutI: {
6381         IRPutI *puti = st->Ist.PutI.details;
6382         IRRegArray* descr_b;
6383         IRAtom      *t1, *t2, *t3, *t4;
6384         IRRegArray* descr = puti->descr;
6385         IRType equivIntTy
6386            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6387         /* If this array is unshadowable for whatever reason,
6388            generate no code. */
6389         if (equivIntTy == Ity_INVALID)
6390            break;
6391         tl_assert(sizeofIRType(equivIntTy) >= 4);
6392         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6393         descr_b
6394            = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6395                            equivIntTy, descr->nElems );
6396         /* Compute a value to Put - the conjoinment of the origin for
6397            the data to be Put-ted (obviously) and of the index value
6398            (not so obviously). */
6399         t1 = schemeE( mce, puti->data );
6400         t2 = schemeE( mce, puti->ix );
6401         t3 = gen_maxU32( mce, t1, t2 );
6402         t4 = zWidenFrom32( mce, equivIntTy, t3 );
6403         stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6404                                               puti->bias, t4) ));
6405         break;
6406      }
6407
6408      case Ist_Dirty:
6409         do_origins_Dirty( mce, st->Ist.Dirty.details );
6410         break;
6411
6412      case Ist_Store:
6413         do_origins_Store( mce, st->Ist.Store.end,
6414                                st->Ist.Store.addr,
6415                                st->Ist.Store.data );
6416         break;
6417
6418      case Ist_LLSC: {
6419         /* In short: treat a load-linked like a normal load followed
6420            by an assignment of the loaded (shadow) data the result
6421            temporary.  Treat a store-conditional like a normal store,
6422            and mark the result temporary as defined. */
6423         if (st->Ist.LLSC.storedata == NULL) {
6424            /* Load Linked */
6425            IRType resTy
6426               = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6427            IRExpr* vanillaLoad
6428               = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6429            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6430                      || resTy == Ity_I16 || resTy == Ity_I8);
6431            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6432                              schemeE(mce, vanillaLoad));
6433         } else {
6434            /* Store conditional */
6435            do_origins_Store( mce, st->Ist.LLSC.end,
6436                                   st->Ist.LLSC.addr,
6437                                   st->Ist.LLSC.storedata );
6438            /* For the rationale behind this, see comments at the
6439               place where the V-shadow for .result is constructed, in
6440               do_shadow_LLSC.  In short, we regard .result as
6441               always-defined. */
6442            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6443                              mkU32(0) );
6444         }
6445         break;
6446      }
6447
6448      case Ist_Put: {
6449         Int b_offset
6450            = MC_(get_otrack_shadow_offset)(
6451                 st->Ist.Put.offset,
6452                 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
6453              );
6454         if (b_offset >= 0) {
6455            /* FIXME: this isn't an atom! */
6456            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6457                                       schemeE( mce, st->Ist.Put.data )) );
6458         }
6459         break;
6460      }
6461
6462      case Ist_WrTmp:
6463         assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6464                           schemeE(mce, st->Ist.WrTmp.data) );
6465         break;
6466
6467      case Ist_MBE:
6468      case Ist_NoOp:
6469      case Ist_Exit:
6470      case Ist_IMark:
6471         break;
6472
6473      default:
6474         VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6475         ppIRStmt(st);
6476         VG_(tool_panic)("memcheck:schemeS");
6477   }
6478}
6479
6480
6481/*--------------------------------------------------------------------*/
6482/*--- end                                           mc_translate.c ---*/
6483/*--------------------------------------------------------------------*/
6484