mc_translate.c revision 7eb17a827d03c3f7c83f75b6634ea15cbdfede5a
1
2/*--------------------------------------------------------------------*/
3/*--- Instrument IR to perform memory checking operations.         ---*/
4/*---                                               mc_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of MemCheck, a heavyweight Valgrind tool for
9   detecting memory errors.
10
11   Copyright (C) 2000-2012 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_tool_basics.h"
33#include "pub_tool_poolalloc.h"     // For mc_include.h
34#include "pub_tool_hashtable.h"     // For mc_include.h
35#include "pub_tool_libcassert.h"
36#include "pub_tool_libcprint.h"
37#include "pub_tool_tooliface.h"
38#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
39#include "pub_tool_xarray.h"
40#include "pub_tool_mallocfree.h"
41#include "pub_tool_libcbase.h"
42
43#include "mc_include.h"
44
45
46/* FIXMEs JRS 2011-June-16.
47
48   Check the interpretation for vector narrowing and widening ops,
49   particularly the saturating ones.  I suspect they are either overly
50   pessimistic and/or wrong.
51*/
52
53/* This file implements the Memcheck instrumentation, and in
54   particular contains the core of its undefined value detection
55   machinery.  For a comprehensive background of the terminology,
56   algorithms and rationale used herein, read:
57
58     Using Valgrind to detect undefined value errors with
59     bit-precision
60
61     Julian Seward and Nicholas Nethercote
62
63     2005 USENIX Annual Technical Conference (General Track),
64     Anaheim, CA, USA, April 10-15, 2005.
65
66   ----
67
68   Here is as good a place as any to record exactly when V bits are and
69   should be checked, why, and what function is responsible.
70
71
72   Memcheck complains when an undefined value is used:
73
74   1. In the condition of a conditional branch.  Because it could cause
75      incorrect control flow, and thus cause incorrect externally-visible
76      behaviour.  [mc_translate.c:complainIfUndefined]
77
78   2. As an argument to a system call, or as the value that specifies
79      the system call number.  Because it could cause an incorrect
80      externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
81
82   3. As the address in a load or store.  Because it could cause an
83      incorrect value to be used later, which could cause externally-visible
84      behaviour (eg. via incorrect control flow or an incorrect system call
85      argument)  [complainIfUndefined]
86
87   4. As the target address of a branch.  Because it could cause incorrect
88      control flow.  [complainIfUndefined]
89
90   5. As an argument to setenv, unsetenv, or putenv.  Because it could put
91      an incorrect value into the external environment.
92      [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
93
94   6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
95      [complainIfUndefined]
96
97   7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
98      VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
99      requested it.  [in memcheck.h]
100
101
102   Memcheck also complains, but should not, when an undefined value is used:
103
104   8. As the shift value in certain SIMD shift operations (but not in the
105      standard integer shift operations).  This inconsistency is due to
106      historical reasons.)  [complainIfUndefined]
107
108
109   Memcheck does not complain, but should, when an undefined value is used:
110
111   9. As an input to a client request.  Because the client request may
112      affect the visible behaviour -- see bug #144362 for an example
113      involving the malloc replacements in vg_replace_malloc.c and
114      VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
115      isn't identified.  That bug report also has some info on how to solve
116      the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
117
118
119   In practice, 1 and 2 account for the vast majority of cases.
120*/
121
122/*------------------------------------------------------------*/
123/*--- Forward decls                                        ---*/
124/*------------------------------------------------------------*/
125
126struct _MCEnv;
127
128static IRType  shadowTypeV ( IRType ty );
129static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
130static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
131
132static IRExpr *i128_const_zero(void);
133
134/*------------------------------------------------------------*/
135/*--- Memcheck running state, and tmp management.          ---*/
136/*------------------------------------------------------------*/
137
138/* Carries info about a particular tmp.  The tmp's number is not
139   recorded, as this is implied by (equal to) its index in the tmpMap
140   in MCEnv.  The tmp's type is also not recorded, as this is present
141   in MCEnv.sb->tyenv.
142
143   When .kind is Orig, .shadowV and .shadowB may give the identities
144   of the temps currently holding the associated definedness (shadowV)
145   and origin (shadowB) values, or these may be IRTemp_INVALID if code
146   to compute such values has not yet been emitted.
147
148   When .kind is VSh or BSh then the tmp is holds a V- or B- value,
149   and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
150   illogical for a shadow tmp itself to be shadowed.
151*/
152typedef
153   enum { Orig=1, VSh=2, BSh=3 }
154   TempKind;
155
156typedef
157   struct {
158      TempKind kind;
159      IRTemp   shadowV;
160      IRTemp   shadowB;
161   }
162   TempMapEnt;
163
164
165/* Carries around state during memcheck instrumentation. */
166typedef
167   struct _MCEnv {
168      /* MODIFIED: the superblock being constructed.  IRStmts are
169         added. */
170      IRSB* sb;
171      Bool  trace;
172
173      /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
174         current kind and possibly shadow temps for each temp in the
175         IRSB being constructed.  Note that it does not contain the
176         type of each tmp.  If you want to know the type, look at the
177         relevant entry in sb->tyenv.  It follows that at all times
178         during the instrumentation process, the valid indices for
179         tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
180         total number of Orig, V- and B- temps allocated so far.
181
182         The reason for this strange split (types in one place, all
183         other info in another) is that we need the types to be
184         attached to sb so as to make it possible to do
185         "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
186         instrumentation process. */
187      XArray* /* of TempMapEnt */ tmpMap;
188
189      /* MODIFIED: indicates whether "bogus" literals have so far been
190         found.  Starts off False, and may change to True. */
191      Bool bogusLiterals;
192
193      /* READONLY: indicates whether we should use expensive
194         interpretations of integer adds, since unfortunately LLVM
195         uses them to do ORs in some circumstances.  Defaulted to True
196         on MacOS and False everywhere else. */
197      Bool useLLVMworkarounds;
198
199      /* READONLY: the guest layout.  This indicates which parts of
200         the guest state should be regarded as 'always defined'. */
201      VexGuestLayout* layout;
202
203      /* READONLY: the host word type.  Needed for constructing
204         arguments of type 'HWord' to be passed to helper functions.
205         Ity_I32 or Ity_I64 only. */
206      IRType hWordTy;
207   }
208   MCEnv;
209
210/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
211   demand), as they are encountered.  This is for two reasons.
212
213   (1) (less important reason): Many original tmps are unused due to
214   initial IR optimisation, and we do not want to spaces in tables
215   tracking them.
216
217   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
218   table indexed [0 .. n_types-1], which gives the current shadow for
219   each original tmp, or INVALID_IRTEMP if none is so far assigned.
220   It is necessary to support making multiple assignments to a shadow
221   -- specifically, after testing a shadow for definedness, it needs
222   to be made defined.  But IR's SSA property disallows this.
223
224   (2) (more important reason): Therefore, when a shadow needs to get
225   a new value, a new temporary is created, the value is assigned to
226   that, and the tmpMap is updated to reflect the new binding.
227
228   A corollary is that if the tmpMap maps a given tmp to
229   IRTemp_INVALID and we are hoping to read that shadow tmp, it means
230   there's a read-before-write error in the original tmps.  The IR
231   sanity checker should catch all such anomalies, however.
232*/
233
234/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
235   both the table in mce->sb and to our auxiliary mapping.  Note that
236   newTemp may cause mce->tmpMap to resize, hence previous results
237   from VG_(indexXA)(mce->tmpMap) are invalidated. */
238static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
239{
240   Word       newIx;
241   TempMapEnt ent;
242   IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
243   ent.kind    = kind;
244   ent.shadowV = IRTemp_INVALID;
245   ent.shadowB = IRTemp_INVALID;
246   newIx = VG_(addToXA)( mce->tmpMap, &ent );
247   tl_assert(newIx == (Word)tmp);
248   return tmp;
249}
250
251
252/* Find the tmp currently shadowing the given original tmp.  If none
253   so far exists, allocate one.  */
254static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
255{
256   TempMapEnt* ent;
257   /* VG_(indexXA) range-checks 'orig', hence no need to check
258      here. */
259   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
260   tl_assert(ent->kind == Orig);
261   if (ent->shadowV == IRTemp_INVALID) {
262      IRTemp tmpV
263        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
264      /* newTemp may cause mce->tmpMap to resize, hence previous results
265         from VG_(indexXA) are invalid. */
266      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
267      tl_assert(ent->kind == Orig);
268      tl_assert(ent->shadowV == IRTemp_INVALID);
269      ent->shadowV = tmpV;
270   }
271   return ent->shadowV;
272}
273
274/* Allocate a new shadow for the given original tmp.  This means any
275   previous shadow is abandoned.  This is needed because it is
276   necessary to give a new value to a shadow once it has been tested
277   for undefinedness, but unfortunately IR's SSA property disallows
278   this.  Instead we must abandon the old shadow, allocate a new one
279   and use that instead.
280
281   This is the same as findShadowTmpV, except we don't bother to see
282   if a shadow temp already existed -- we simply allocate a new one
283   regardless. */
284static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
285{
286   TempMapEnt* ent;
287   /* VG_(indexXA) range-checks 'orig', hence no need to check
288      here. */
289   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
290   tl_assert(ent->kind == Orig);
291   if (1) {
292      IRTemp tmpV
293        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
294      /* newTemp may cause mce->tmpMap to resize, hence previous results
295         from VG_(indexXA) are invalid. */
296      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
297      tl_assert(ent->kind == Orig);
298      ent->shadowV = tmpV;
299   }
300}
301
302
303/*------------------------------------------------------------*/
304/*--- IRAtoms -- a subset of IRExprs                       ---*/
305/*------------------------------------------------------------*/
306
307/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
308   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
309   input, most of this code deals in atoms.  Usefully, a value atom
310   always has a V-value which is also an atom: constants are shadowed
311   by constants, and temps are shadowed by the corresponding shadow
312   temporary. */
313
314typedef  IRExpr  IRAtom;
315
316/* (used for sanity checks only): is this an atom which looks
317   like it's from original code? */
318static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
319{
320   if (a1->tag == Iex_Const)
321      return True;
322   if (a1->tag == Iex_RdTmp) {
323      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
324      return ent->kind == Orig;
325   }
326   return False;
327}
328
329/* (used for sanity checks only): is this an atom which looks
330   like it's from shadow code? */
331static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
332{
333   if (a1->tag == Iex_Const)
334      return True;
335   if (a1->tag == Iex_RdTmp) {
336      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
337      return ent->kind == VSh || ent->kind == BSh;
338   }
339   return False;
340}
341
342/* (used for sanity checks only): check that both args are atoms and
343   are identically-kinded. */
344static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
345{
346   if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
347      return True;
348   if (a1->tag == Iex_Const && a2->tag == Iex_Const)
349      return True;
350   return False;
351}
352
353
354/*------------------------------------------------------------*/
355/*--- Type management                                      ---*/
356/*------------------------------------------------------------*/
357
358/* Shadow state is always accessed using integer types.  This returns
359   an integer type with the same size (as per sizeofIRType) as the
360   given type.  The only valid shadow types are Bit, I8, I16, I32,
361   I64, I128, V128, V256. */
362
363static IRType shadowTypeV ( IRType ty )
364{
365   switch (ty) {
366      case Ity_I1:
367      case Ity_I8:
368      case Ity_I16:
369      case Ity_I32:
370      case Ity_I64:
371      case Ity_I128: return ty;
372      case Ity_F32:  return Ity_I32;
373      case Ity_D32:  return Ity_I32;
374      case Ity_F64:  return Ity_I64;
375      case Ity_D64:  return Ity_I64;
376      case Ity_F128: return Ity_I128;
377      case Ity_D128: return Ity_I128;
378      case Ity_V128: return Ity_V128;
379      case Ity_V256: return Ity_V256;
380      default: ppIRType(ty);
381               VG_(tool_panic)("memcheck:shadowTypeV");
382   }
383}
384
385/* Produce a 'defined' value of the given shadow type.  Should only be
386   supplied shadow types (Bit/I8/I16/I32/UI64). */
387static IRExpr* definedOfType ( IRType ty ) {
388   switch (ty) {
389      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
390      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
391      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
392      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
393      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
394      case Ity_I128: return i128_const_zero();
395      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
396      default:       VG_(tool_panic)("memcheck:definedOfType");
397   }
398}
399
400
401/*------------------------------------------------------------*/
402/*--- Constructing IR fragments                            ---*/
403/*------------------------------------------------------------*/
404
405/* add stmt to a bb */
406static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
407   if (mce->trace) {
408      VG_(printf)("  %c: ", cat);
409      ppIRStmt(st);
410      VG_(printf)("\n");
411   }
412   addStmtToIRSB(mce->sb, st);
413}
414
415/* assign value to tmp */
416static inline
417void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
418   stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
419}
420
421/* build various kinds of expressions */
422#define triop(_op, _arg1, _arg2, _arg3) \
423                                 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
424#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
425#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
426#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
427#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
428#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
429#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
430#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
431#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
432
433/* Bind the given expression to a new temporary, and return the
434   temporary.  This effectively converts an arbitrary expression into
435   an atom.
436
437   'ty' is the type of 'e' and hence the type that the new temporary
438   needs to be.  But passing it in is redundant, since we can deduce
439   the type merely by inspecting 'e'.  So at least use that fact to
440   assert that the two types agree. */
441static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
442{
443   TempKind k;
444   IRTemp   t;
445   IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
446
447   tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
448   switch (cat) {
449      case 'V': k = VSh;  break;
450      case 'B': k = BSh;  break;
451      case 'C': k = Orig; break;
452                /* happens when we are making up new "orig"
453                   expressions, for IRCAS handling */
454      default: tl_assert(0);
455   }
456   t = newTemp(mce, ty, k);
457   assign(cat, mce, t, e);
458   return mkexpr(t);
459}
460
461
462/*------------------------------------------------------------*/
463/*--- Helper functions for 128-bit ops                     ---*/
464/*------------------------------------------------------------*/
465
466static IRExpr *i128_const_zero(void)
467{
468   IRAtom* z64 = IRExpr_Const(IRConst_U64(0));
469   return binop(Iop_64HLto128, z64, z64);
470}
471
472/* There are no I128-bit loads and/or stores [as generated by any
473   current front ends].  So we do not need to worry about that in
474   expr2vbits_Load */
475
476
477/*------------------------------------------------------------*/
478/*--- Constructing definedness primitive ops               ---*/
479/*------------------------------------------------------------*/
480
481/* --------- Defined-if-either-defined --------- */
482
483static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
484   tl_assert(isShadowAtom(mce,a1));
485   tl_assert(isShadowAtom(mce,a2));
486   return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
487}
488
489static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
490   tl_assert(isShadowAtom(mce,a1));
491   tl_assert(isShadowAtom(mce,a2));
492   return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
493}
494
495static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
496   tl_assert(isShadowAtom(mce,a1));
497   tl_assert(isShadowAtom(mce,a2));
498   return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
499}
500
501static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
502   tl_assert(isShadowAtom(mce,a1));
503   tl_assert(isShadowAtom(mce,a2));
504   return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
505}
506
507static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
508   tl_assert(isShadowAtom(mce,a1));
509   tl_assert(isShadowAtom(mce,a2));
510   return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
511}
512
513static IRAtom* mkDifDV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
514   tl_assert(isShadowAtom(mce,a1));
515   tl_assert(isShadowAtom(mce,a2));
516   return assignNew('V', mce, Ity_V256, binop(Iop_AndV256, a1, a2));
517}
518
519/* --------- Undefined-if-either-undefined --------- */
520
521static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
522   tl_assert(isShadowAtom(mce,a1));
523   tl_assert(isShadowAtom(mce,a2));
524   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
525}
526
527static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
528   tl_assert(isShadowAtom(mce,a1));
529   tl_assert(isShadowAtom(mce,a2));
530   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
531}
532
533static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
534   tl_assert(isShadowAtom(mce,a1));
535   tl_assert(isShadowAtom(mce,a2));
536   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
537}
538
539static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
540   tl_assert(isShadowAtom(mce,a1));
541   tl_assert(isShadowAtom(mce,a2));
542   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
543}
544
545static IRAtom* mkUifU128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
546   IRAtom *tmp1, *tmp2, *tmp3, *tmp4, *tmp5, *tmp6;
547   tl_assert(isShadowAtom(mce,a1));
548   tl_assert(isShadowAtom(mce,a2));
549   tmp1 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a1));
550   tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a1));
551   tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, a2));
552   tmp4 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, a2));
553   tmp5 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp1, tmp3));
554   tmp6 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp4));
555
556   return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp6, tmp5));
557}
558
559static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
560   tl_assert(isShadowAtom(mce,a1));
561   tl_assert(isShadowAtom(mce,a2));
562   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
563}
564
565static IRAtom* mkUifUV256 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
566   tl_assert(isShadowAtom(mce,a1));
567   tl_assert(isShadowAtom(mce,a2));
568   return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, a1, a2));
569}
570
571static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
572   switch (vty) {
573      case Ity_I8:   return mkUifU8(mce, a1, a2);
574      case Ity_I16:  return mkUifU16(mce, a1, a2);
575      case Ity_I32:  return mkUifU32(mce, a1, a2);
576      case Ity_I64:  return mkUifU64(mce, a1, a2);
577      case Ity_I128: return mkUifU128(mce, a1, a2);
578      case Ity_V128: return mkUifUV128(mce, a1, a2);
579      default:
580         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
581         VG_(tool_panic)("memcheck:mkUifU");
582   }
583}
584
585/* --------- The Left-family of operations. --------- */
586
587static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
588   tl_assert(isShadowAtom(mce,a1));
589   return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
590}
591
592static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
593   tl_assert(isShadowAtom(mce,a1));
594   return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
595}
596
597static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
598   tl_assert(isShadowAtom(mce,a1));
599   return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
600}
601
602static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
603   tl_assert(isShadowAtom(mce,a1));
604   return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
605}
606
607/* --------- 'Improvement' functions for AND/OR. --------- */
608
609/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
610   defined (0); all other -> undefined (1).
611*/
612static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
613{
614   tl_assert(isOriginalAtom(mce, data));
615   tl_assert(isShadowAtom(mce, vbits));
616   tl_assert(sameKindedAtoms(data, vbits));
617   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
618}
619
620static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
621{
622   tl_assert(isOriginalAtom(mce, data));
623   tl_assert(isShadowAtom(mce, vbits));
624   tl_assert(sameKindedAtoms(data, vbits));
625   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
626}
627
628static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
629{
630   tl_assert(isOriginalAtom(mce, data));
631   tl_assert(isShadowAtom(mce, vbits));
632   tl_assert(sameKindedAtoms(data, vbits));
633   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
634}
635
636static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
637{
638   tl_assert(isOriginalAtom(mce, data));
639   tl_assert(isShadowAtom(mce, vbits));
640   tl_assert(sameKindedAtoms(data, vbits));
641   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
642}
643
644static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
645{
646   tl_assert(isOriginalAtom(mce, data));
647   tl_assert(isShadowAtom(mce, vbits));
648   tl_assert(sameKindedAtoms(data, vbits));
649   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
650}
651
652static IRAtom* mkImproveANDV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
653{
654   tl_assert(isOriginalAtom(mce, data));
655   tl_assert(isShadowAtom(mce, vbits));
656   tl_assert(sameKindedAtoms(data, vbits));
657   return assignNew('V', mce, Ity_V256, binop(Iop_OrV256, data, vbits));
658}
659
660/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
661   defined (0); all other -> undefined (1).
662*/
663static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
664{
665   tl_assert(isOriginalAtom(mce, data));
666   tl_assert(isShadowAtom(mce, vbits));
667   tl_assert(sameKindedAtoms(data, vbits));
668   return assignNew(
669             'V', mce, Ity_I8,
670             binop(Iop_Or8,
671                   assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
672                   vbits) );
673}
674
675static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
676{
677   tl_assert(isOriginalAtom(mce, data));
678   tl_assert(isShadowAtom(mce, vbits));
679   tl_assert(sameKindedAtoms(data, vbits));
680   return assignNew(
681             'V', mce, Ity_I16,
682             binop(Iop_Or16,
683                   assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
684                   vbits) );
685}
686
687static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
688{
689   tl_assert(isOriginalAtom(mce, data));
690   tl_assert(isShadowAtom(mce, vbits));
691   tl_assert(sameKindedAtoms(data, vbits));
692   return assignNew(
693             'V', mce, Ity_I32,
694             binop(Iop_Or32,
695                   assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
696                   vbits) );
697}
698
699static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
700{
701   tl_assert(isOriginalAtom(mce, data));
702   tl_assert(isShadowAtom(mce, vbits));
703   tl_assert(sameKindedAtoms(data, vbits));
704   return assignNew(
705             'V', mce, Ity_I64,
706             binop(Iop_Or64,
707                   assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
708                   vbits) );
709}
710
711static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
712{
713   tl_assert(isOriginalAtom(mce, data));
714   tl_assert(isShadowAtom(mce, vbits));
715   tl_assert(sameKindedAtoms(data, vbits));
716   return assignNew(
717             'V', mce, Ity_V128,
718             binop(Iop_OrV128,
719                   assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
720                   vbits) );
721}
722
723static IRAtom* mkImproveORV256 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
724{
725   tl_assert(isOriginalAtom(mce, data));
726   tl_assert(isShadowAtom(mce, vbits));
727   tl_assert(sameKindedAtoms(data, vbits));
728   return assignNew(
729             'V', mce, Ity_V256,
730             binop(Iop_OrV256,
731                   assignNew('V', mce, Ity_V256, unop(Iop_NotV256, data)),
732                   vbits) );
733}
734
735/* --------- Pessimising casts. --------- */
736
737/* The function returns an expression of type DST_TY. If any of the VBITS
738   is undefined (value == 1) the resulting expression has all bits set to
739   1. Otherwise, all bits are 0. */
740
741static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
742{
743   IRType  src_ty;
744   IRAtom* tmp1;
745
746   /* Note, dst_ty is a shadow type, not an original type. */
747   tl_assert(isShadowAtom(mce,vbits));
748   src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
749
750   /* Fast-track some common cases */
751   if (src_ty == Ity_I32 && dst_ty == Ity_I32)
752      return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
753
754   if (src_ty == Ity_I64 && dst_ty == Ity_I64)
755      return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
756
757   if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
758      /* PCast the arg, then clone it. */
759      IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
760      return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
761   }
762
763   if (src_ty == Ity_I64 && dst_ty == Ity_I32) {
764      /* PCast the arg.  This gives all 0s or all 1s.  Then throw away
765         the top half. */
766      IRAtom* tmp = assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
767      return assignNew('V', mce, Ity_I32, unop(Iop_64to32, tmp));
768   }
769
770   /* Else do it the slow way .. */
771   /* First of all, collapse vbits down to a single bit. */
772   tmp1   = NULL;
773   switch (src_ty) {
774      case Ity_I1:
775         tmp1 = vbits;
776         break;
777      case Ity_I8:
778         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
779         break;
780      case Ity_I16:
781         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
782         break;
783      case Ity_I32:
784         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
785         break;
786      case Ity_I64:
787         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
788         break;
789      case Ity_I128: {
790         /* Gah.  Chop it in half, OR the halves together, and compare
791            that with zero. */
792         IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
793         IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
794         IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
795         tmp1         = assignNew('V', mce, Ity_I1,
796                                       unop(Iop_CmpNEZ64, tmp4));
797         break;
798      }
799      default:
800         ppIRType(src_ty);
801         VG_(tool_panic)("mkPCastTo(1)");
802   }
803   tl_assert(tmp1);
804   /* Now widen up to the dst type. */
805   switch (dst_ty) {
806      case Ity_I1:
807         return tmp1;
808      case Ity_I8:
809         return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
810      case Ity_I16:
811         return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
812      case Ity_I32:
813         return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
814      case Ity_I64:
815         return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
816      case Ity_V128:
817         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
818         tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
819         return tmp1;
820      case Ity_I128:
821         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
822         tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
823         return tmp1;
824      default:
825         ppIRType(dst_ty);
826         VG_(tool_panic)("mkPCastTo(2)");
827   }
828}
829
830/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
831/*
832   Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
833   PCasting to Ity_U1.  However, sometimes it is necessary to be more
834   accurate.  The insight is that the result is defined if two
835   corresponding bits can be found, one from each argument, so that
836   both bits are defined but are different -- that makes EQ say "No"
837   and NE say "Yes".  Hence, we compute an improvement term and DifD
838   it onto the "normal" (UifU) result.
839
840   The result is:
841
842   PCastTo<1> (
843      -- naive version
844      PCastTo<sz>( UifU<sz>(vxx, vyy) )
845
846      `DifD<sz>`
847
848      -- improvement term
849      PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
850   )
851
852   where
853     vec contains 0 (defined) bits where the corresponding arg bits
854     are defined but different, and 1 bits otherwise.
855
856     vec = Or<sz>( vxx,   // 0 iff bit defined
857                   vyy,   // 0 iff bit defined
858                   Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
859                 )
860
861     If any bit of vec is 0, the result is defined and so the
862     improvement term should produce 0...0, else it should produce
863     1...1.
864
865     Hence require for the improvement term:
866
867        if vec == 1...1 then 1...1 else 0...0
868     ->
869        PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
870
871   This was extensively re-analysed and checked on 6 July 05.
872*/
873static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
874                                    IRType  ty,
875                                    IRAtom* vxx, IRAtom* vyy,
876                                    IRAtom* xx,  IRAtom* yy )
877{
878   IRAtom *naive, *vec, *improvement_term;
879   IRAtom *improved, *final_cast, *top;
880   IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
881
882   tl_assert(isShadowAtom(mce,vxx));
883   tl_assert(isShadowAtom(mce,vyy));
884   tl_assert(isOriginalAtom(mce,xx));
885   tl_assert(isOriginalAtom(mce,yy));
886   tl_assert(sameKindedAtoms(vxx,xx));
887   tl_assert(sameKindedAtoms(vyy,yy));
888
889   switch (ty) {
890      case Ity_I32:
891         opOR   = Iop_Or32;
892         opDIFD = Iop_And32;
893         opUIFU = Iop_Or32;
894         opNOT  = Iop_Not32;
895         opXOR  = Iop_Xor32;
896         opCMP  = Iop_CmpEQ32;
897         top    = mkU32(0xFFFFFFFF);
898         break;
899      case Ity_I64:
900         opOR   = Iop_Or64;
901         opDIFD = Iop_And64;
902         opUIFU = Iop_Or64;
903         opNOT  = Iop_Not64;
904         opXOR  = Iop_Xor64;
905         opCMP  = Iop_CmpEQ64;
906         top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
907         break;
908      default:
909         VG_(tool_panic)("expensiveCmpEQorNE");
910   }
911
912   naive
913      = mkPCastTo(mce,ty,
914                  assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
915
916   vec
917      = assignNew(
918           'V', mce,ty,
919           binop( opOR,
920                  assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
921                  assignNew(
922                     'V', mce,ty,
923                     unop( opNOT,
924                           assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
925
926   improvement_term
927      = mkPCastTo( mce,ty,
928                   assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
929
930   improved
931      = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
932
933   final_cast
934      = mkPCastTo( mce, Ity_I1, improved );
935
936   return final_cast;
937}
938
939
940/* --------- Semi-accurate interpretation of CmpORD. --------- */
941
942/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
943
944      CmpORD32S(x,y) = 1<<3   if  x <s y
945                     = 1<<2   if  x >s y
946                     = 1<<1   if  x == y
947
948   and similarly the unsigned variant.  The default interpretation is:
949
950      CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
951                                  & (7<<1)
952
953   The "& (7<<1)" reflects the fact that all result bits except 3,2,1
954   are zero and therefore defined (viz, zero).
955
956   Also deal with a special case better:
957
958      CmpORD32S(x,0)
959
960   Here, bit 3 (LT) of the result is a copy of the top bit of x and
961   will be defined even if the rest of x isn't.  In which case we do:
962
963      CmpORD32S#(x,x#,0,{impliedly 0}#)
964         = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
965           | (x# >>u 31) << 3      -- LT# = x#[31]
966
967   Analogous handling for CmpORD64{S,U}.
968*/
969static Bool isZeroU32 ( IRAtom* e )
970{
971   return
972      toBool( e->tag == Iex_Const
973              && e->Iex.Const.con->tag == Ico_U32
974              && e->Iex.Const.con->Ico.U32 == 0 );
975}
976
977static Bool isZeroU64 ( IRAtom* e )
978{
979   return
980      toBool( e->tag == Iex_Const
981              && e->Iex.Const.con->tag == Ico_U64
982              && e->Iex.Const.con->Ico.U64 == 0 );
983}
984
985static IRAtom* doCmpORD ( MCEnv*  mce,
986                          IROp    cmp_op,
987                          IRAtom* xxhash, IRAtom* yyhash,
988                          IRAtom* xx,     IRAtom* yy )
989{
990   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
991   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
992   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
993   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
994   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
995   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
996   IRType ty     = m64 ? Ity_I64   : Ity_I32;
997   Int    width  = m64 ? 64        : 32;
998
999   Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
1000
1001   IRAtom* threeLeft1 = NULL;
1002   IRAtom* sevenLeft1 = NULL;
1003
1004   tl_assert(isShadowAtom(mce,xxhash));
1005   tl_assert(isShadowAtom(mce,yyhash));
1006   tl_assert(isOriginalAtom(mce,xx));
1007   tl_assert(isOriginalAtom(mce,yy));
1008   tl_assert(sameKindedAtoms(xxhash,xx));
1009   tl_assert(sameKindedAtoms(yyhash,yy));
1010   tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
1011             || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
1012
1013   if (0) {
1014      ppIROp(cmp_op); VG_(printf)(" ");
1015      ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
1016   }
1017
1018   if (syned && isZero(yy)) {
1019      /* fancy interpretation */
1020      /* if yy is zero, then it must be fully defined (zero#). */
1021      tl_assert(isZero(yyhash));
1022      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
1023      return
1024         binop(
1025            opOR,
1026            assignNew(
1027               'V', mce,ty,
1028               binop(
1029                  opAND,
1030                  mkPCastTo(mce,ty, xxhash),
1031                  threeLeft1
1032               )),
1033            assignNew(
1034               'V', mce,ty,
1035               binop(
1036                  opSHL,
1037                  assignNew(
1038                     'V', mce,ty,
1039                     binop(opSHR, xxhash, mkU8(width-1))),
1040                  mkU8(3)
1041               ))
1042	 );
1043   } else {
1044      /* standard interpretation */
1045      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
1046      return
1047         binop(
1048            opAND,
1049            mkPCastTo( mce,ty,
1050                       mkUifU(mce,ty, xxhash,yyhash)),
1051            sevenLeft1
1052         );
1053   }
1054}
1055
1056
1057/*------------------------------------------------------------*/
1058/*--- Emit a test and complaint if something is undefined. ---*/
1059/*------------------------------------------------------------*/
1060
1061static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
1062
1063
1064/* Set the annotations on a dirty helper to indicate that the stack
1065   pointer and instruction pointers might be read.  This is the
1066   behaviour of all 'emit-a-complaint' style functions we might
1067   call. */
1068
1069static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
1070   di->nFxState = 2;
1071   di->fxState[0].fx        = Ifx_Read;
1072   di->fxState[0].offset    = mce->layout->offset_SP;
1073   di->fxState[0].size      = mce->layout->sizeof_SP;
1074   di->fxState[0].nRepeats  = 0;
1075   di->fxState[0].repeatLen = 0;
1076   di->fxState[1].fx        = Ifx_Read;
1077   di->fxState[1].offset    = mce->layout->offset_IP;
1078   di->fxState[1].size      = mce->layout->sizeof_IP;
1079   di->fxState[1].nRepeats  = 0;
1080   di->fxState[1].repeatLen = 0;
1081}
1082
1083
1084/* Check the supplied **original** atom for undefinedness, and emit a
1085   complaint if so.  Once that happens, mark it as defined.  This is
1086   possible because the atom is either a tmp or literal.  If it's a
1087   tmp, it will be shadowed by a tmp, and so we can set the shadow to
1088   be defined.  In fact as mentioned above, we will have to allocate a
1089   new tmp to carry the new 'defined' shadow value, and update the
1090   original->tmp mapping accordingly; we cannot simply assign a new
1091   value to an existing shadow tmp as this breaks SSAness -- resulting
1092   in the post-instrumentation sanity checker spluttering in disapproval.
1093*/
1094static void complainIfUndefined ( MCEnv* mce, IRAtom* atom, IRExpr *guard )
1095{
1096   IRAtom*  vatom;
1097   IRType   ty;
1098   Int      sz;
1099   IRDirty* di;
1100   IRAtom*  cond;
1101   IRAtom*  origin;
1102   void*    fn;
1103   HChar*   nm;
1104   IRExpr** args;
1105   Int      nargs;
1106
1107   // Don't do V bit tests if we're not reporting undefined value errors.
1108   if (MC_(clo_mc_level) == 1)
1109      return;
1110
1111   /* Since the original expression is atomic, there's no duplicated
1112      work generated by making multiple V-expressions for it.  So we
1113      don't really care about the possibility that someone else may
1114      also create a V-interpretion for it. */
1115   tl_assert(isOriginalAtom(mce, atom));
1116   vatom = expr2vbits( mce, atom );
1117   tl_assert(isShadowAtom(mce, vatom));
1118   tl_assert(sameKindedAtoms(atom, vatom));
1119
1120   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1121
1122   /* sz is only used for constructing the error message */
1123   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1124
1125   cond = mkPCastTo( mce, Ity_I1, vatom );
1126   /* cond will be 0 if all defined, and 1 if any not defined. */
1127
1128   /* Get the origin info for the value we are about to check.  At
1129      least, if we are doing origin tracking.  If not, use a dummy
1130      zero origin. */
1131   if (MC_(clo_mc_level) == 3) {
1132      origin = schemeE( mce, atom );
1133      if (mce->hWordTy == Ity_I64) {
1134         origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1135      }
1136   } else {
1137      origin = NULL;
1138   }
1139
1140   fn    = NULL;
1141   nm    = NULL;
1142   args  = NULL;
1143   nargs = -1;
1144
1145   switch (sz) {
1146      case 0:
1147         if (origin) {
1148            fn    = &MC_(helperc_value_check0_fail_w_o);
1149            nm    = "MC_(helperc_value_check0_fail_w_o)";
1150            args  = mkIRExprVec_1(origin);
1151            nargs = 1;
1152         } else {
1153            fn    = &MC_(helperc_value_check0_fail_no_o);
1154            nm    = "MC_(helperc_value_check0_fail_no_o)";
1155            args  = mkIRExprVec_0();
1156            nargs = 0;
1157         }
1158         break;
1159      case 1:
1160         if (origin) {
1161            fn    = &MC_(helperc_value_check1_fail_w_o);
1162            nm    = "MC_(helperc_value_check1_fail_w_o)";
1163            args  = mkIRExprVec_1(origin);
1164            nargs = 1;
1165         } else {
1166            fn    = &MC_(helperc_value_check1_fail_no_o);
1167            nm    = "MC_(helperc_value_check1_fail_no_o)";
1168            args  = mkIRExprVec_0();
1169            nargs = 0;
1170         }
1171         break;
1172      case 4:
1173         if (origin) {
1174            fn    = &MC_(helperc_value_check4_fail_w_o);
1175            nm    = "MC_(helperc_value_check4_fail_w_o)";
1176            args  = mkIRExprVec_1(origin);
1177            nargs = 1;
1178         } else {
1179            fn    = &MC_(helperc_value_check4_fail_no_o);
1180            nm    = "MC_(helperc_value_check4_fail_no_o)";
1181            args  = mkIRExprVec_0();
1182            nargs = 0;
1183         }
1184         break;
1185      case 8:
1186         if (origin) {
1187            fn    = &MC_(helperc_value_check8_fail_w_o);
1188            nm    = "MC_(helperc_value_check8_fail_w_o)";
1189            args  = mkIRExprVec_1(origin);
1190            nargs = 1;
1191         } else {
1192            fn    = &MC_(helperc_value_check8_fail_no_o);
1193            nm    = "MC_(helperc_value_check8_fail_no_o)";
1194            args  = mkIRExprVec_0();
1195            nargs = 0;
1196         }
1197         break;
1198      case 2:
1199      case 16:
1200         if (origin) {
1201            fn    = &MC_(helperc_value_checkN_fail_w_o);
1202            nm    = "MC_(helperc_value_checkN_fail_w_o)";
1203            args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1204            nargs = 2;
1205         } else {
1206            fn    = &MC_(helperc_value_checkN_fail_no_o);
1207            nm    = "MC_(helperc_value_checkN_fail_no_o)";
1208            args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1209            nargs = 1;
1210         }
1211         break;
1212      default:
1213         VG_(tool_panic)("unexpected szB");
1214   }
1215
1216   tl_assert(fn);
1217   tl_assert(nm);
1218   tl_assert(args);
1219   tl_assert(nargs >= 0 && nargs <= 2);
1220   tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1221              || (MC_(clo_mc_level) == 2 && origin == NULL) );
1222
1223   di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1224                           VG_(fnptr_to_fnentry)( fn ), args );
1225   di->guard = cond;
1226
1227   /* If the complaint is to be issued under a guard condition, AND that
1228      guard condition. */
1229   if (guard) {
1230     IRAtom *g1 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, di->guard));
1231     IRAtom *g2 = assignNew('V', mce, Ity_I32, unop(Iop_1Uto32, guard));
1232     IRAtom *e  = assignNew('V', mce, Ity_I32, binop(Iop_And32, g1, g2));
1233
1234     di->guard = assignNew('V', mce, Ity_I1, unop(Iop_32to1, e));
1235   }
1236
1237   setHelperAnns( mce, di );
1238   stmt( 'V', mce, IRStmt_Dirty(di));
1239
1240   /* Set the shadow tmp to be defined.  First, update the
1241      orig->shadow tmp mapping to reflect the fact that this shadow is
1242      getting a new value. */
1243   tl_assert(isIRAtom(vatom));
1244   /* sameKindedAtoms ... */
1245   if (vatom->tag == Iex_RdTmp) {
1246      tl_assert(atom->tag == Iex_RdTmp);
1247      newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1248      assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1249                       definedOfType(ty));
1250   }
1251}
1252
1253
1254/*------------------------------------------------------------*/
1255/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1256/*------------------------------------------------------------*/
1257
1258/* Examine the always-defined sections declared in layout to see if
1259   the (offset,size) section is within one.  Note, is is an error to
1260   partially fall into such a region: (offset,size) should either be
1261   completely in such a region or completely not-in such a region.
1262*/
1263static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1264{
1265   Int minoffD, maxoffD, i;
1266   Int minoff = offset;
1267   Int maxoff = minoff + size - 1;
1268   tl_assert((minoff & ~0xFFFF) == 0);
1269   tl_assert((maxoff & ~0xFFFF) == 0);
1270
1271   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1272      minoffD = mce->layout->alwaysDefd[i].offset;
1273      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1274      tl_assert((minoffD & ~0xFFFF) == 0);
1275      tl_assert((maxoffD & ~0xFFFF) == 0);
1276
1277      if (maxoff < minoffD || maxoffD < minoff)
1278         continue; /* no overlap */
1279      if (minoff >= minoffD && maxoff <= maxoffD)
1280         return True; /* completely contained in an always-defd section */
1281
1282      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1283   }
1284   return False; /* could not find any containing section */
1285}
1286
1287
1288/* Generate into bb suitable actions to shadow this Put.  If the state
1289   slice is marked 'always defined', do nothing.  Otherwise, write the
1290   supplied V bits to the shadow state.  We can pass in either an
1291   original atom or a V-atom, but not both.  In the former case the
1292   relevant V-bits are then generated from the original.
1293   We assume here, that the definedness of GUARD has already been checked.
1294*/
1295static
1296void do_shadow_PUT ( MCEnv* mce,  Int offset,
1297                     IRAtom* atom, IRAtom* vatom, IRExpr *guard )
1298{
1299   IRType ty;
1300
1301   // Don't do shadow PUTs if we're not doing undefined value checking.
1302   // Their absence lets Vex's optimiser remove all the shadow computation
1303   // that they depend on, which includes GETs of the shadow registers.
1304   if (MC_(clo_mc_level) == 1)
1305      return;
1306
1307   if (atom) {
1308      tl_assert(!vatom);
1309      tl_assert(isOriginalAtom(mce, atom));
1310      vatom = expr2vbits( mce, atom );
1311   } else {
1312      tl_assert(vatom);
1313      tl_assert(isShadowAtom(mce, vatom));
1314   }
1315
1316   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1317   tl_assert(ty != Ity_I1);
1318   tl_assert(ty != Ity_I128);
1319   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1320      /* later: no ... */
1321      /* emit code to emit a complaint if any of the vbits are 1. */
1322      /* complainIfUndefined(mce, atom); */
1323   } else {
1324      /* Do a plain shadow Put. */
1325      if (guard) {
1326         /* If the guard expression evaluates to false we simply Put the value
1327            that is already stored in the guest state slot */
1328         IRAtom *cond, *iffalse;
1329
1330         cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
1331         iffalse = assignNew('V', mce, ty,
1332                             IRExpr_Get(offset + mce->layout->total_sizeB, ty));
1333         vatom   = assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, vatom));
1334      }
1335      stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ));
1336   }
1337}
1338
1339
1340/* Return an expression which contains the V bits corresponding to the
1341   given GETI (passed in in pieces).
1342*/
1343static
1344void do_shadow_PUTI ( MCEnv* mce, IRPutI *puti)
1345{
1346   IRAtom* vatom;
1347   IRType  ty, tyS;
1348   Int     arrSize;;
1349   IRRegArray* descr = puti->descr;
1350   IRAtom*     ix    = puti->ix;
1351   Int         bias  = puti->bias;
1352   IRAtom*     atom  = puti->data;
1353
1354   // Don't do shadow PUTIs if we're not doing undefined value checking.
1355   // Their absence lets Vex's optimiser remove all the shadow computation
1356   // that they depend on, which includes GETIs of the shadow registers.
1357   if (MC_(clo_mc_level) == 1)
1358      return;
1359
1360   tl_assert(isOriginalAtom(mce,atom));
1361   vatom = expr2vbits( mce, atom );
1362   tl_assert(sameKindedAtoms(atom, vatom));
1363   ty   = descr->elemTy;
1364   tyS  = shadowTypeV(ty);
1365   arrSize = descr->nElems * sizeofIRType(ty);
1366   tl_assert(ty != Ity_I1);
1367   tl_assert(isOriginalAtom(mce,ix));
1368   complainIfUndefined(mce, ix, NULL);
1369   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1370      /* later: no ... */
1371      /* emit code to emit a complaint if any of the vbits are 1. */
1372      /* complainIfUndefined(mce, atom); */
1373   } else {
1374      /* Do a cloned version of the Put that refers to the shadow
1375         area. */
1376      IRRegArray* new_descr
1377         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1378                         tyS, descr->nElems);
1379      stmt( 'V', mce, IRStmt_PutI( mkIRPutI(new_descr, ix, bias, vatom) ));
1380   }
1381}
1382
1383
1384/* Return an expression which contains the V bits corresponding to the
1385   given GET (passed in in pieces).
1386*/
1387static
1388IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1389{
1390   IRType tyS = shadowTypeV(ty);
1391   tl_assert(ty != Ity_I1);
1392   tl_assert(ty != Ity_I128);
1393   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1394      /* Always defined, return all zeroes of the relevant type */
1395      return definedOfType(tyS);
1396   } else {
1397      /* return a cloned version of the Get that refers to the shadow
1398         area. */
1399      /* FIXME: this isn't an atom! */
1400      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1401   }
1402}
1403
1404
1405/* Return an expression which contains the V bits corresponding to the
1406   given GETI (passed in in pieces).
1407*/
1408static
1409IRExpr* shadow_GETI ( MCEnv* mce,
1410                      IRRegArray* descr, IRAtom* ix, Int bias )
1411{
1412   IRType ty   = descr->elemTy;
1413   IRType tyS  = shadowTypeV(ty);
1414   Int arrSize = descr->nElems * sizeofIRType(ty);
1415   tl_assert(ty != Ity_I1);
1416   tl_assert(isOriginalAtom(mce,ix));
1417   complainIfUndefined(mce, ix, NULL);
1418   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1419      /* Always defined, return all zeroes of the relevant type */
1420      return definedOfType(tyS);
1421   } else {
1422      /* return a cloned version of the Get that refers to the shadow
1423         area. */
1424      IRRegArray* new_descr
1425         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1426                         tyS, descr->nElems);
1427      return IRExpr_GetI( new_descr, ix, bias );
1428   }
1429}
1430
1431
1432/*------------------------------------------------------------*/
1433/*--- Generating approximations for unknown operations,    ---*/
1434/*--- using lazy-propagate semantics                       ---*/
1435/*------------------------------------------------------------*/
1436
1437/* Lazy propagation of undefinedness from two values, resulting in the
1438   specified shadow type.
1439*/
1440static
1441IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1442{
1443   IRAtom* at;
1444   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1445   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1446   tl_assert(isShadowAtom(mce,va1));
1447   tl_assert(isShadowAtom(mce,va2));
1448
1449   /* The general case is inefficient because PCast is an expensive
1450      operation.  Here are some special cases which use PCast only
1451      once rather than twice. */
1452
1453   /* I64 x I64 -> I64 */
1454   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1455      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1456      at = mkUifU(mce, Ity_I64, va1, va2);
1457      at = mkPCastTo(mce, Ity_I64, at);
1458      return at;
1459   }
1460
1461   /* I64 x I64 -> I32 */
1462   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1463      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1464      at = mkUifU(mce, Ity_I64, va1, va2);
1465      at = mkPCastTo(mce, Ity_I32, at);
1466      return at;
1467   }
1468
1469   if (0) {
1470      VG_(printf)("mkLazy2 ");
1471      ppIRType(t1);
1472      VG_(printf)("_");
1473      ppIRType(t2);
1474      VG_(printf)("_");
1475      ppIRType(finalVty);
1476      VG_(printf)("\n");
1477   }
1478
1479   /* General case: force everything via 32-bit intermediaries. */
1480   at = mkPCastTo(mce, Ity_I32, va1);
1481   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1482   at = mkPCastTo(mce, finalVty, at);
1483   return at;
1484}
1485
1486
1487/* 3-arg version of the above. */
1488static
1489IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1490                  IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1491{
1492   IRAtom* at;
1493   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1494   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1495   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1496   tl_assert(isShadowAtom(mce,va1));
1497   tl_assert(isShadowAtom(mce,va2));
1498   tl_assert(isShadowAtom(mce,va3));
1499
1500   /* The general case is inefficient because PCast is an expensive
1501      operation.  Here are some special cases which use PCast only
1502      twice rather than three times. */
1503
1504   /* I32 x I64 x I64 -> I64 */
1505   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1506   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1507       && finalVty == Ity_I64) {
1508      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1509      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1510         mode indication which is fully defined, this should get
1511         folded out later. */
1512      at = mkPCastTo(mce, Ity_I64, va1);
1513      /* Now fold in 2nd and 3rd args. */
1514      at = mkUifU(mce, Ity_I64, at, va2);
1515      at = mkUifU(mce, Ity_I64, at, va3);
1516      /* and PCast once again. */
1517      at = mkPCastTo(mce, Ity_I64, at);
1518      return at;
1519   }
1520
1521   /* I32 x I64 x I64 -> I32 */
1522   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1523       && finalVty == Ity_I32) {
1524      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1525      at = mkPCastTo(mce, Ity_I64, va1);
1526      at = mkUifU(mce, Ity_I64, at, va2);
1527      at = mkUifU(mce, Ity_I64, at, va3);
1528      at = mkPCastTo(mce, Ity_I32, at);
1529      return at;
1530   }
1531
1532   /* I32 x I32 x I32 -> I32 */
1533   /* 32-bit FP idiom, as (eg) happens on ARM */
1534   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1535       && finalVty == Ity_I32) {
1536      if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1537      at = va1;
1538      at = mkUifU(mce, Ity_I32, at, va2);
1539      at = mkUifU(mce, Ity_I32, at, va3);
1540      at = mkPCastTo(mce, Ity_I32, at);
1541      return at;
1542   }
1543
1544   /* I32 x I128 x I128 -> I128 */
1545   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1546   if (t1 == Ity_I32 && t2 == Ity_I128 && t3 == Ity_I128
1547       && finalVty == Ity_I128) {
1548      if (0) VG_(printf)("mkLazy3: I32 x I128 x I128 -> I128\n");
1549      /* Widen 1st arg to I128.  Since 1st arg is typically a rounding
1550         mode indication which is fully defined, this should get
1551         folded out later. */
1552      at = mkPCastTo(mce, Ity_I128, va1);
1553      /* Now fold in 2nd and 3rd args. */
1554      at = mkUifU(mce, Ity_I128, at, va2);
1555      at = mkUifU(mce, Ity_I128, at, va3);
1556      /* and PCast once again. */
1557      at = mkPCastTo(mce, Ity_I128, at);
1558      return at;
1559   }
1560   if (1) {
1561      VG_(printf)("mkLazy3: ");
1562      ppIRType(t1);
1563      VG_(printf)(" x ");
1564      ppIRType(t2);
1565      VG_(printf)(" x ");
1566      ppIRType(t3);
1567      VG_(printf)(" -> ");
1568      ppIRType(finalVty);
1569      VG_(printf)("\n");
1570   }
1571
1572   tl_assert(0);
1573   /* General case: force everything via 32-bit intermediaries. */
1574   /*
1575   at = mkPCastTo(mce, Ity_I32, va1);
1576   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1577   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1578   at = mkPCastTo(mce, finalVty, at);
1579   return at;
1580   */
1581}
1582
1583
1584/* 4-arg version of the above. */
1585static
1586IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1587                  IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1588{
1589   IRAtom* at;
1590   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1591   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1592   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1593   IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1594   tl_assert(isShadowAtom(mce,va1));
1595   tl_assert(isShadowAtom(mce,va2));
1596   tl_assert(isShadowAtom(mce,va3));
1597   tl_assert(isShadowAtom(mce,va4));
1598
1599   /* The general case is inefficient because PCast is an expensive
1600      operation.  Here are some special cases which use PCast only
1601      twice rather than three times. */
1602
1603   /* I32 x I64 x I64 x I64 -> I64 */
1604   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1605   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1606       && finalVty == Ity_I64) {
1607      if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1608      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1609         mode indication which is fully defined, this should get
1610         folded out later. */
1611      at = mkPCastTo(mce, Ity_I64, va1);
1612      /* Now fold in 2nd, 3rd, 4th args. */
1613      at = mkUifU(mce, Ity_I64, at, va2);
1614      at = mkUifU(mce, Ity_I64, at, va3);
1615      at = mkUifU(mce, Ity_I64, at, va4);
1616      /* and PCast once again. */
1617      at = mkPCastTo(mce, Ity_I64, at);
1618      return at;
1619   }
1620   /* I32 x I32 x I32 x I32 -> I32 */
1621   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1622   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32 && t4 == Ity_I32
1623       && finalVty == Ity_I32) {
1624      if (0) VG_(printf)("mkLazy4: I32 x I32 x I32 x I32 -> I32\n");
1625      at = va1;
1626      /* Now fold in 2nd, 3rd, 4th args. */
1627      at = mkUifU(mce, Ity_I32, at, va2);
1628      at = mkUifU(mce, Ity_I32, at, va3);
1629      at = mkUifU(mce, Ity_I32, at, va4);
1630      at = mkPCastTo(mce, Ity_I32, at);
1631      return at;
1632   }
1633
1634   if (1) {
1635      VG_(printf)("mkLazy4: ");
1636      ppIRType(t1);
1637      VG_(printf)(" x ");
1638      ppIRType(t2);
1639      VG_(printf)(" x ");
1640      ppIRType(t3);
1641      VG_(printf)(" x ");
1642      ppIRType(t4);
1643      VG_(printf)(" -> ");
1644      ppIRType(finalVty);
1645      VG_(printf)("\n");
1646   }
1647
1648   tl_assert(0);
1649}
1650
1651
1652/* Do the lazy propagation game from a null-terminated vector of
1653   atoms.  This is presumably the arguments to a helper call, so the
1654   IRCallee info is also supplied in order that we can know which
1655   arguments should be ignored (via the .mcx_mask field).
1656*/
1657static
1658IRAtom* mkLazyN ( MCEnv* mce,
1659                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1660{
1661   Int     i;
1662   IRAtom* here;
1663   IRAtom* curr;
1664   IRType  mergeTy;
1665   Bool    mergeTy64 = True;
1666
1667   /* Decide on the type of the merge intermediary.  If all relevant
1668      args are I64, then it's I64.  In all other circumstances, use
1669      I32. */
1670   for (i = 0; exprvec[i]; i++) {
1671      tl_assert(i < 32);
1672      tl_assert(isOriginalAtom(mce, exprvec[i]));
1673      if (cee->mcx_mask & (1<<i))
1674         continue;
1675      if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1676         mergeTy64 = False;
1677   }
1678
1679   mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1680   curr    = definedOfType(mergeTy);
1681
1682   for (i = 0; exprvec[i]; i++) {
1683      tl_assert(i < 32);
1684      tl_assert(isOriginalAtom(mce, exprvec[i]));
1685      /* Only take notice of this arg if the callee's mc-exclusion
1686         mask does not say it is to be excluded. */
1687      if (cee->mcx_mask & (1<<i)) {
1688         /* the arg is to be excluded from definedness checking.  Do
1689            nothing. */
1690         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1691      } else {
1692         /* calculate the arg's definedness, and pessimistically merge
1693            it in. */
1694         here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1695         curr = mergeTy64
1696                   ? mkUifU64(mce, here, curr)
1697                   : mkUifU32(mce, here, curr);
1698      }
1699   }
1700   return mkPCastTo(mce, finalVtype, curr );
1701}
1702
1703
1704/*------------------------------------------------------------*/
1705/*--- Generating expensive sequences for exact carry-chain ---*/
1706/*--- propagation in add/sub and related operations.       ---*/
1707/*------------------------------------------------------------*/
1708
1709static
1710IRAtom* expensiveAddSub ( MCEnv*  mce,
1711                          Bool    add,
1712                          IRType  ty,
1713                          IRAtom* qaa, IRAtom* qbb,
1714                          IRAtom* aa,  IRAtom* bb )
1715{
1716   IRAtom *a_min, *b_min, *a_max, *b_max;
1717   IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1718
1719   tl_assert(isShadowAtom(mce,qaa));
1720   tl_assert(isShadowAtom(mce,qbb));
1721   tl_assert(isOriginalAtom(mce,aa));
1722   tl_assert(isOriginalAtom(mce,bb));
1723   tl_assert(sameKindedAtoms(qaa,aa));
1724   tl_assert(sameKindedAtoms(qbb,bb));
1725
1726   switch (ty) {
1727      case Ity_I32:
1728         opAND = Iop_And32;
1729         opOR  = Iop_Or32;
1730         opXOR = Iop_Xor32;
1731         opNOT = Iop_Not32;
1732         opADD = Iop_Add32;
1733         opSUB = Iop_Sub32;
1734         break;
1735      case Ity_I64:
1736         opAND = Iop_And64;
1737         opOR  = Iop_Or64;
1738         opXOR = Iop_Xor64;
1739         opNOT = Iop_Not64;
1740         opADD = Iop_Add64;
1741         opSUB = Iop_Sub64;
1742         break;
1743      default:
1744         VG_(tool_panic)("expensiveAddSub");
1745   }
1746
1747   // a_min = aa & ~qaa
1748   a_min = assignNew('V', mce,ty,
1749                     binop(opAND, aa,
1750                                  assignNew('V', mce,ty, unop(opNOT, qaa))));
1751
1752   // b_min = bb & ~qbb
1753   b_min = assignNew('V', mce,ty,
1754                     binop(opAND, bb,
1755                                  assignNew('V', mce,ty, unop(opNOT, qbb))));
1756
1757   // a_max = aa | qaa
1758   a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1759
1760   // b_max = bb | qbb
1761   b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1762
1763   if (add) {
1764      // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1765      return
1766      assignNew('V', mce,ty,
1767         binop( opOR,
1768                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1769                assignNew('V', mce,ty,
1770                   binop( opXOR,
1771                          assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1772                          assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1773                   )
1774                )
1775         )
1776      );
1777   } else {
1778      // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1779      return
1780      assignNew('V', mce,ty,
1781         binop( opOR,
1782                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1783                assignNew('V', mce,ty,
1784                   binop( opXOR,
1785                          assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1786                          assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1787                   )
1788                )
1789         )
1790      );
1791   }
1792
1793}
1794
1795
1796/*------------------------------------------------------------*/
1797/*--- Scalar shifts.                                       ---*/
1798/*------------------------------------------------------------*/
1799
1800/* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1801   idea is to shift the definedness bits by the original shift amount.
1802   This introduces 0s ("defined") in new positions for left shifts and
1803   unsigned right shifts, and copies the top definedness bit for
1804   signed right shifts.  So, conveniently, applying the original shift
1805   operator to the definedness bits for the left arg is exactly the
1806   right thing to do:
1807
1808      (qaa << bb)
1809
1810   However if the shift amount is undefined then the whole result
1811   is undefined.  Hence need:
1812
1813      (qaa << bb) `UifU` PCast(qbb)
1814
1815   If the shift amount bb is a literal than qbb will say 'all defined'
1816   and the UifU and PCast will get folded out by post-instrumentation
1817   optimisation.
1818*/
1819static IRAtom* scalarShift ( MCEnv*  mce,
1820                             IRType  ty,
1821                             IROp    original_op,
1822                             IRAtom* qaa, IRAtom* qbb,
1823                             IRAtom* aa,  IRAtom* bb )
1824{
1825   tl_assert(isShadowAtom(mce,qaa));
1826   tl_assert(isShadowAtom(mce,qbb));
1827   tl_assert(isOriginalAtom(mce,aa));
1828   tl_assert(isOriginalAtom(mce,bb));
1829   tl_assert(sameKindedAtoms(qaa,aa));
1830   tl_assert(sameKindedAtoms(qbb,bb));
1831   return
1832      assignNew(
1833         'V', mce, ty,
1834         mkUifU( mce, ty,
1835                 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1836                 mkPCastTo(mce, ty, qbb)
1837         )
1838   );
1839}
1840
1841
1842/*------------------------------------------------------------*/
1843/*--- Helpers for dealing with vector primops.             ---*/
1844/*------------------------------------------------------------*/
1845
1846/* Vector pessimisation -- pessimise within each lane individually. */
1847
1848static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1849{
1850   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1851}
1852
1853static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1854{
1855   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1856}
1857
1858static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1859{
1860   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1861}
1862
1863static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1864{
1865   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1866}
1867
1868static IRAtom* mkPCast64x4 ( MCEnv* mce, IRAtom* at )
1869{
1870   return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ64x4, at));
1871}
1872
1873static IRAtom* mkPCast32x8 ( MCEnv* mce, IRAtom* at )
1874{
1875   return assignNew('V', mce, Ity_V256, unop(Iop_CmpNEZ32x8, at));
1876}
1877
1878static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1879{
1880   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1881}
1882
1883static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1884{
1885   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1886}
1887
1888static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1889{
1890   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1891}
1892
1893static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1894{
1895   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1896}
1897
1898static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1899{
1900   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1901}
1902
1903
1904/* Here's a simple scheme capable of handling ops derived from SSE1
1905   code and while only generating ops that can be efficiently
1906   implemented in SSE1. */
1907
1908/* All-lanes versions are straightforward:
1909
1910   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1911
1912   unary32Fx4(x,y)    ==> PCast32x4(x#)
1913
1914   Lowest-lane-only versions are more complex:
1915
1916   binary32F0x4(x,y)  ==> SetV128lo32(
1917                             x#,
1918                             PCast32(V128to32(UifUV128(x#,y#)))
1919                          )
1920
1921   This is perhaps not so obvious.  In particular, it's faster to
1922   do a V128-bit UifU and then take the bottom 32 bits than the more
1923   obvious scheme of taking the bottom 32 bits of each operand
1924   and doing a 32-bit UifU.  Basically since UifU is fast and
1925   chopping lanes off vector values is slow.
1926
1927   Finally:
1928
1929   unary32F0x4(x)     ==> SetV128lo32(
1930                             x#,
1931                             PCast32(V128to32(x#))
1932                          )
1933
1934   Where:
1935
1936   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1937   PCast32x4(v#) = CmpNEZ32x4(v#)
1938*/
1939
1940static
1941IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1942{
1943   IRAtom* at;
1944   tl_assert(isShadowAtom(mce, vatomX));
1945   tl_assert(isShadowAtom(mce, vatomY));
1946   at = mkUifUV128(mce, vatomX, vatomY);
1947   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1948   return at;
1949}
1950
1951static
1952IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1953{
1954   IRAtom* at;
1955   tl_assert(isShadowAtom(mce, vatomX));
1956   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1957   return at;
1958}
1959
1960static
1961IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1962{
1963   IRAtom* at;
1964   tl_assert(isShadowAtom(mce, vatomX));
1965   tl_assert(isShadowAtom(mce, vatomY));
1966   at = mkUifUV128(mce, vatomX, vatomY);
1967   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1968   at = mkPCastTo(mce, Ity_I32, at);
1969   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1970   return at;
1971}
1972
1973static
1974IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1975{
1976   IRAtom* at;
1977   tl_assert(isShadowAtom(mce, vatomX));
1978   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1979   at = mkPCastTo(mce, Ity_I32, at);
1980   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1981   return at;
1982}
1983
1984/* --- ... and ... 64Fx2 versions of the same ... --- */
1985
1986static
1987IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1988{
1989   IRAtom* at;
1990   tl_assert(isShadowAtom(mce, vatomX));
1991   tl_assert(isShadowAtom(mce, vatomY));
1992   at = mkUifUV128(mce, vatomX, vatomY);
1993   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1994   return at;
1995}
1996
1997static
1998IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1999{
2000   IRAtom* at;
2001   tl_assert(isShadowAtom(mce, vatomX));
2002   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
2003   return at;
2004}
2005
2006static
2007IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2008{
2009   IRAtom* at;
2010   tl_assert(isShadowAtom(mce, vatomX));
2011   tl_assert(isShadowAtom(mce, vatomY));
2012   at = mkUifUV128(mce, vatomX, vatomY);
2013   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
2014   at = mkPCastTo(mce, Ity_I64, at);
2015   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2016   return at;
2017}
2018
2019static
2020IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
2021{
2022   IRAtom* at;
2023   tl_assert(isShadowAtom(mce, vatomX));
2024   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
2025   at = mkPCastTo(mce, Ity_I64, at);
2026   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
2027   return at;
2028}
2029
2030/* --- --- ... and ... 32Fx2 versions of the same --- --- */
2031
2032static
2033IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2034{
2035   IRAtom* at;
2036   tl_assert(isShadowAtom(mce, vatomX));
2037   tl_assert(isShadowAtom(mce, vatomY));
2038   at = mkUifU64(mce, vatomX, vatomY);
2039   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
2040   return at;
2041}
2042
2043static
2044IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
2045{
2046   IRAtom* at;
2047   tl_assert(isShadowAtom(mce, vatomX));
2048   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
2049   return at;
2050}
2051
2052/* --- ... and ... 64Fx4 versions of the same ... --- */
2053
2054static
2055IRAtom* binary64Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2056{
2057   IRAtom* at;
2058   tl_assert(isShadowAtom(mce, vatomX));
2059   tl_assert(isShadowAtom(mce, vatomY));
2060   at = mkUifUV256(mce, vatomX, vatomY);
2061   at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, at));
2062   return at;
2063}
2064
2065static
2066IRAtom* unary64Fx4 ( MCEnv* mce, IRAtom* vatomX )
2067{
2068   IRAtom* at;
2069   tl_assert(isShadowAtom(mce, vatomX));
2070   at = assignNew('V', mce, Ity_V256, mkPCast64x4(mce, vatomX));
2071   return at;
2072}
2073
2074/* --- ... and ... 32Fx8 versions of the same ... --- */
2075
2076static
2077IRAtom* binary32Fx8 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
2078{
2079   IRAtom* at;
2080   tl_assert(isShadowAtom(mce, vatomX));
2081   tl_assert(isShadowAtom(mce, vatomY));
2082   at = mkUifUV256(mce, vatomX, vatomY);
2083   at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, at));
2084   return at;
2085}
2086
2087static
2088IRAtom* unary32Fx8 ( MCEnv* mce, IRAtom* vatomX )
2089{
2090   IRAtom* at;
2091   tl_assert(isShadowAtom(mce, vatomX));
2092   at = assignNew('V', mce, Ity_V256, mkPCast32x8(mce, vatomX));
2093   return at;
2094}
2095
2096/* --- --- Vector saturated narrowing --- --- */
2097
2098/* We used to do something very clever here, but on closer inspection
2099   (2011-Jun-15), and in particular bug #279698, it turns out to be
2100   wrong.  Part of the problem came from the fact that for a long
2101   time, the IR primops to do with saturated narrowing were
2102   underspecified and managed to confuse multiple cases which needed
2103   to be separate: the op names had a signedness qualifier, but in
2104   fact the source and destination signednesses needed to be specified
2105   independently, so the op names really need two independent
2106   signedness specifiers.
2107
2108   As of 2011-Jun-15 (ish) the underspecification was sorted out
2109   properly.  The incorrect instrumentation remained, though.  That
2110   has now (2011-Oct-22) been fixed.
2111
2112   What we now do is simple:
2113
2114   Let the original narrowing op be QNarrowBinXtoYxZ, where Z is a
2115   number of lanes, X is the source lane width and signedness, and Y
2116   is the destination lane width and signedness.  In all cases the
2117   destination lane width is half the source lane width, so the names
2118   have a bit of redundancy, but are at least easy to read.
2119
2120   For example, Iop_QNarrowBin32Sto16Ux8 narrows 8 lanes of signed 32s
2121   to unsigned 16s.
2122
2123   Let Vanilla(OP) be a function that takes OP, one of these
2124   saturating narrowing ops, and produces the same "shaped" narrowing
2125   op which is not saturating, but merely dumps the most significant
2126   bits.  "same shape" means that the lane numbers and widths are the
2127   same as with OP.
2128
2129   For example, Vanilla(Iop_QNarrowBin32Sto16Ux8)
2130                  = Iop_NarrowBin32to16x8,
2131   that is, narrow 8 lanes of 32 bits to 8 lanes of 16 bits, by
2132   dumping the top half of each lane.
2133
2134   So, with that in place, the scheme is simple, and it is simple to
2135   pessimise each lane individually and then apply Vanilla(OP) so as
2136   to get the result in the right "shape".  If the original OP is
2137   QNarrowBinXtoYxZ then we produce
2138
2139   Vanilla(OP)( PCast-X-to-X-x-Z(vatom1), PCast-X-to-X-x-Z(vatom2) )
2140
2141   or for the case when OP is unary (Iop_QNarrowUn*)
2142
2143   Vanilla(OP)( PCast-X-to-X-x-Z(vatom) )
2144*/
2145static
2146IROp vanillaNarrowingOpOfShape ( IROp qnarrowOp )
2147{
2148   switch (qnarrowOp) {
2149      /* Binary: (128, 128) -> 128 */
2150      case Iop_QNarrowBin16Sto8Ux16:
2151      case Iop_QNarrowBin16Sto8Sx16:
2152      case Iop_QNarrowBin16Uto8Ux16:
2153         return Iop_NarrowBin16to8x16;
2154      case Iop_QNarrowBin32Sto16Ux8:
2155      case Iop_QNarrowBin32Sto16Sx8:
2156      case Iop_QNarrowBin32Uto16Ux8:
2157         return Iop_NarrowBin32to16x8;
2158      /* Binary: (64, 64) -> 64 */
2159      case Iop_QNarrowBin32Sto16Sx4:
2160         return Iop_NarrowBin32to16x4;
2161      case Iop_QNarrowBin16Sto8Ux8:
2162      case Iop_QNarrowBin16Sto8Sx8:
2163         return Iop_NarrowBin16to8x8;
2164      /* Unary: 128 -> 64 */
2165      case Iop_QNarrowUn64Uto32Ux2:
2166      case Iop_QNarrowUn64Sto32Sx2:
2167      case Iop_QNarrowUn64Sto32Ux2:
2168         return Iop_NarrowUn64to32x2;
2169      case Iop_QNarrowUn32Uto16Ux4:
2170      case Iop_QNarrowUn32Sto16Sx4:
2171      case Iop_QNarrowUn32Sto16Ux4:
2172         return Iop_NarrowUn32to16x4;
2173      case Iop_QNarrowUn16Uto8Ux8:
2174      case Iop_QNarrowUn16Sto8Sx8:
2175      case Iop_QNarrowUn16Sto8Ux8:
2176         return Iop_NarrowUn16to8x8;
2177      default:
2178         ppIROp(qnarrowOp);
2179         VG_(tool_panic)("vanillaNarrowOpOfShape");
2180   }
2181}
2182
2183static
2184IRAtom* vectorNarrowBinV128 ( MCEnv* mce, IROp narrow_op,
2185                              IRAtom* vatom1, IRAtom* vatom2)
2186{
2187   IRAtom *at1, *at2, *at3;
2188   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2189   switch (narrow_op) {
2190      case Iop_QNarrowBin32Sto16Sx8: pcast = mkPCast32x4; break;
2191      case Iop_QNarrowBin32Uto16Ux8: pcast = mkPCast32x4; break;
2192      case Iop_QNarrowBin32Sto16Ux8: pcast = mkPCast32x4; break;
2193      case Iop_QNarrowBin16Sto8Sx16: pcast = mkPCast16x8; break;
2194      case Iop_QNarrowBin16Uto8Ux16: pcast = mkPCast16x8; break;
2195      case Iop_QNarrowBin16Sto8Ux16: pcast = mkPCast16x8; break;
2196      default: VG_(tool_panic)("vectorNarrowBinV128");
2197   }
2198   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2199   tl_assert(isShadowAtom(mce,vatom1));
2200   tl_assert(isShadowAtom(mce,vatom2));
2201   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2202   at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
2203   at3 = assignNew('V', mce, Ity_V128, binop(vanilla_narrow, at1, at2));
2204   return at3;
2205}
2206
2207static
2208IRAtom* vectorNarrowBin64 ( MCEnv* mce, IROp narrow_op,
2209                            IRAtom* vatom1, IRAtom* vatom2)
2210{
2211   IRAtom *at1, *at2, *at3;
2212   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2213   switch (narrow_op) {
2214      case Iop_QNarrowBin32Sto16Sx4: pcast = mkPCast32x2; break;
2215      case Iop_QNarrowBin16Sto8Sx8:  pcast = mkPCast16x4; break;
2216      case Iop_QNarrowBin16Sto8Ux8:  pcast = mkPCast16x4; break;
2217      default: VG_(tool_panic)("vectorNarrowBin64");
2218   }
2219   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2220   tl_assert(isShadowAtom(mce,vatom1));
2221   tl_assert(isShadowAtom(mce,vatom2));
2222   at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
2223   at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
2224   at3 = assignNew('V', mce, Ity_I64, binop(vanilla_narrow, at1, at2));
2225   return at3;
2226}
2227
2228static
2229IRAtom* vectorNarrowUnV128 ( MCEnv* mce, IROp narrow_op,
2230                             IRAtom* vatom1)
2231{
2232   IRAtom *at1, *at2;
2233   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2234   tl_assert(isShadowAtom(mce,vatom1));
2235   /* For vanilla narrowing (non-saturating), we can just apply
2236      the op directly to the V bits. */
2237   switch (narrow_op) {
2238      case Iop_NarrowUn16to8x8:
2239      case Iop_NarrowUn32to16x4:
2240      case Iop_NarrowUn64to32x2:
2241         at1 = assignNew('V', mce, Ity_I64, unop(narrow_op, vatom1));
2242         return at1;
2243      default:
2244         break; /* Do Plan B */
2245   }
2246   /* Plan B: for ops that involve a saturation operation on the args,
2247      we must PCast before the vanilla narrow. */
2248   switch (narrow_op) {
2249      case Iop_QNarrowUn16Sto8Sx8:  pcast = mkPCast16x8; break;
2250      case Iop_QNarrowUn16Sto8Ux8:  pcast = mkPCast16x8; break;
2251      case Iop_QNarrowUn16Uto8Ux8:  pcast = mkPCast16x8; break;
2252      case Iop_QNarrowUn32Sto16Sx4: pcast = mkPCast32x4; break;
2253      case Iop_QNarrowUn32Sto16Ux4: pcast = mkPCast32x4; break;
2254      case Iop_QNarrowUn32Uto16Ux4: pcast = mkPCast32x4; break;
2255      case Iop_QNarrowUn64Sto32Sx2: pcast = mkPCast64x2; break;
2256      case Iop_QNarrowUn64Sto32Ux2: pcast = mkPCast64x2; break;
2257      case Iop_QNarrowUn64Uto32Ux2: pcast = mkPCast64x2; break;
2258      default: VG_(tool_panic)("vectorNarrowUnV128");
2259   }
2260   IROp vanilla_narrow = vanillaNarrowingOpOfShape(narrow_op);
2261   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
2262   at2 = assignNew('V', mce, Ity_I64, unop(vanilla_narrow, at1));
2263   return at2;
2264}
2265
2266static
2267IRAtom* vectorWidenI64 ( MCEnv* mce, IROp longen_op,
2268                         IRAtom* vatom1)
2269{
2270   IRAtom *at1, *at2;
2271   IRAtom* (*pcast)( MCEnv*, IRAtom* );
2272   switch (longen_op) {
2273      case Iop_Widen8Uto16x8:  pcast = mkPCast16x8; break;
2274      case Iop_Widen8Sto16x8:  pcast = mkPCast16x8; break;
2275      case Iop_Widen16Uto32x4: pcast = mkPCast32x4; break;
2276      case Iop_Widen16Sto32x4: pcast = mkPCast32x4; break;
2277      case Iop_Widen32Uto64x2: pcast = mkPCast64x2; break;
2278      case Iop_Widen32Sto64x2: pcast = mkPCast64x2; break;
2279      default: VG_(tool_panic)("vectorWidenI64");
2280   }
2281   tl_assert(isShadowAtom(mce,vatom1));
2282   at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
2283   at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2284   return at2;
2285}
2286
2287
2288/* --- --- Vector integer arithmetic --- --- */
2289
2290/* Simple ... UifU the args and per-lane pessimise the results. */
2291
2292/* --- V128-bit versions --- */
2293
2294static
2295IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2296{
2297   IRAtom* at;
2298   at = mkUifUV128(mce, vatom1, vatom2);
2299   at = mkPCast8x16(mce, at);
2300   return at;
2301}
2302
2303static
2304IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2305{
2306   IRAtom* at;
2307   at = mkUifUV128(mce, vatom1, vatom2);
2308   at = mkPCast16x8(mce, at);
2309   return at;
2310}
2311
2312static
2313IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2314{
2315   IRAtom* at;
2316   at = mkUifUV128(mce, vatom1, vatom2);
2317   at = mkPCast32x4(mce, at);
2318   return at;
2319}
2320
2321static
2322IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2323{
2324   IRAtom* at;
2325   at = mkUifUV128(mce, vatom1, vatom2);
2326   at = mkPCast64x2(mce, at);
2327   return at;
2328}
2329
2330/* --- 64-bit versions --- */
2331
2332static
2333IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2334{
2335   IRAtom* at;
2336   at = mkUifU64(mce, vatom1, vatom2);
2337   at = mkPCast8x8(mce, at);
2338   return at;
2339}
2340
2341static
2342IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2343{
2344   IRAtom* at;
2345   at = mkUifU64(mce, vatom1, vatom2);
2346   at = mkPCast16x4(mce, at);
2347   return at;
2348}
2349
2350static
2351IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2352{
2353   IRAtom* at;
2354   at = mkUifU64(mce, vatom1, vatom2);
2355   at = mkPCast32x2(mce, at);
2356   return at;
2357}
2358
2359static
2360IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2361{
2362   IRAtom* at;
2363   at = mkUifU64(mce, vatom1, vatom2);
2364   at = mkPCastTo(mce, Ity_I64, at);
2365   return at;
2366}
2367
2368/* --- 32-bit versions --- */
2369
2370static
2371IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2372{
2373   IRAtom* at;
2374   at = mkUifU32(mce, vatom1, vatom2);
2375   at = mkPCast8x4(mce, at);
2376   return at;
2377}
2378
2379static
2380IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2381{
2382   IRAtom* at;
2383   at = mkUifU32(mce, vatom1, vatom2);
2384   at = mkPCast16x2(mce, at);
2385   return at;
2386}
2387
2388
2389/*------------------------------------------------------------*/
2390/*--- Generate shadow values from all kinds of IRExprs.    ---*/
2391/*------------------------------------------------------------*/
2392
2393static
2394IRAtom* expr2vbits_Qop ( MCEnv* mce,
2395                         IROp op,
2396                         IRAtom* atom1, IRAtom* atom2,
2397                         IRAtom* atom3, IRAtom* atom4 )
2398{
2399   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2400   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2401   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2402   IRAtom* vatom4 = expr2vbits( mce, atom4 );
2403
2404   tl_assert(isOriginalAtom(mce,atom1));
2405   tl_assert(isOriginalAtom(mce,atom2));
2406   tl_assert(isOriginalAtom(mce,atom3));
2407   tl_assert(isOriginalAtom(mce,atom4));
2408   tl_assert(isShadowAtom(mce,vatom1));
2409   tl_assert(isShadowAtom(mce,vatom2));
2410   tl_assert(isShadowAtom(mce,vatom3));
2411   tl_assert(isShadowAtom(mce,vatom4));
2412   tl_assert(sameKindedAtoms(atom1,vatom1));
2413   tl_assert(sameKindedAtoms(atom2,vatom2));
2414   tl_assert(sameKindedAtoms(atom3,vatom3));
2415   tl_assert(sameKindedAtoms(atom4,vatom4));
2416   switch (op) {
2417      case Iop_MAddF64:
2418      case Iop_MAddF64r32:
2419      case Iop_MSubF64:
2420      case Iop_MSubF64r32:
2421         /* I32(rm) x F64 x F64 x F64 -> F64 */
2422         return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2423
2424      case Iop_MAddF32:
2425      case Iop_MSubF32:
2426         /* I32(rm) x F32 x F32 x F32 -> F32 */
2427         return mkLazy4(mce, Ity_I32, vatom1, vatom2, vatom3, vatom4);
2428
2429      /* V256-bit data-steering */
2430      case Iop_64x4toV256:
2431         return assignNew('V', mce, Ity_V256,
2432                          IRExpr_Qop(op, vatom1, vatom2, vatom3, vatom4));
2433
2434      default:
2435         ppIROp(op);
2436         VG_(tool_panic)("memcheck:expr2vbits_Qop");
2437   }
2438}
2439
2440
2441static
2442IRAtom* expr2vbits_Triop ( MCEnv* mce,
2443                           IROp op,
2444                           IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2445{
2446   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2447   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2448   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2449
2450   tl_assert(isOriginalAtom(mce,atom1));
2451   tl_assert(isOriginalAtom(mce,atom2));
2452   tl_assert(isOriginalAtom(mce,atom3));
2453   tl_assert(isShadowAtom(mce,vatom1));
2454   tl_assert(isShadowAtom(mce,vatom2));
2455   tl_assert(isShadowAtom(mce,vatom3));
2456   tl_assert(sameKindedAtoms(atom1,vatom1));
2457   tl_assert(sameKindedAtoms(atom2,vatom2));
2458   tl_assert(sameKindedAtoms(atom3,vatom3));
2459   switch (op) {
2460      case Iop_AddF128:
2461      case Iop_AddD128:
2462      case Iop_SubF128:
2463      case Iop_SubD128:
2464      case Iop_MulF128:
2465      case Iop_MulD128:
2466      case Iop_DivF128:
2467      case Iop_DivD128:
2468      case Iop_QuantizeD128:
2469         /* I32(rm) x F128/D128 x F128/D128 -> F128/D128 */
2470         return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2471      case Iop_AddF64:
2472      case Iop_AddD64:
2473      case Iop_AddF64r32:
2474      case Iop_SubF64:
2475      case Iop_SubD64:
2476      case Iop_SubF64r32:
2477      case Iop_MulF64:
2478      case Iop_MulD64:
2479      case Iop_MulF64r32:
2480      case Iop_DivF64:
2481      case Iop_DivD64:
2482      case Iop_DivF64r32:
2483      case Iop_ScaleF64:
2484      case Iop_Yl2xF64:
2485      case Iop_Yl2xp1F64:
2486      case Iop_AtanF64:
2487      case Iop_PRemF64:
2488      case Iop_PRem1F64:
2489      case Iop_QuantizeD64:
2490         /* I32(rm) x F64/D64 x F64/D64 -> F64/D64 */
2491         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2492      case Iop_PRemC3210F64:
2493      case Iop_PRem1C3210F64:
2494         /* I32(rm) x F64 x F64 -> I32 */
2495         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2496      case Iop_AddF32:
2497      case Iop_SubF32:
2498      case Iop_MulF32:
2499      case Iop_DivF32:
2500         /* I32(rm) x F32 x F32 -> I32 */
2501         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2502      case Iop_SignificanceRoundD64:
2503         /* IRRoundingModeDFP(I32) x I8 x D64 -> D64 */
2504         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2505      case Iop_SignificanceRoundD128:
2506         /* IRRoundingModeDFP(I32) x I8 x D128 -> D128 */
2507         return mkLazy3(mce, Ity_I128, vatom1, vatom2, vatom3);
2508      case Iop_ExtractV128:
2509         complainIfUndefined(mce, atom3, NULL);
2510         return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2511      case Iop_Extract64:
2512         complainIfUndefined(mce, atom3, NULL);
2513         return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2514      case Iop_SetElem8x8:
2515      case Iop_SetElem16x4:
2516      case Iop_SetElem32x2:
2517         complainIfUndefined(mce, atom2, NULL);
2518         return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2519      default:
2520         ppIROp(op);
2521         VG_(tool_panic)("memcheck:expr2vbits_Triop");
2522   }
2523}
2524
2525
2526static
2527IRAtom* expr2vbits_Binop ( MCEnv* mce,
2528                           IROp op,
2529                           IRAtom* atom1, IRAtom* atom2 )
2530{
2531   IRType  and_or_ty;
2532   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2533   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2534   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2535
2536   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2537   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2538
2539   tl_assert(isOriginalAtom(mce,atom1));
2540   tl_assert(isOriginalAtom(mce,atom2));
2541   tl_assert(isShadowAtom(mce,vatom1));
2542   tl_assert(isShadowAtom(mce,vatom2));
2543   tl_assert(sameKindedAtoms(atom1,vatom1));
2544   tl_assert(sameKindedAtoms(atom2,vatom2));
2545   switch (op) {
2546
2547      /* 32-bit SIMD */
2548
2549      case Iop_Add16x2:
2550      case Iop_HAdd16Ux2:
2551      case Iop_HAdd16Sx2:
2552      case Iop_Sub16x2:
2553      case Iop_HSub16Ux2:
2554      case Iop_HSub16Sx2:
2555      case Iop_QAdd16Sx2:
2556      case Iop_QSub16Sx2:
2557         return binary16Ix2(mce, vatom1, vatom2);
2558
2559      case Iop_Add8x4:
2560      case Iop_HAdd8Ux4:
2561      case Iop_HAdd8Sx4:
2562      case Iop_Sub8x4:
2563      case Iop_HSub8Ux4:
2564      case Iop_HSub8Sx4:
2565      case Iop_QSub8Ux4:
2566      case Iop_QAdd8Ux4:
2567      case Iop_QSub8Sx4:
2568      case Iop_QAdd8Sx4:
2569         return binary8Ix4(mce, vatom1, vatom2);
2570
2571      /* 64-bit SIMD */
2572
2573      case Iop_ShrN8x8:
2574      case Iop_ShrN16x4:
2575      case Iop_ShrN32x2:
2576      case Iop_SarN8x8:
2577      case Iop_SarN16x4:
2578      case Iop_SarN32x2:
2579      case Iop_ShlN16x4:
2580      case Iop_ShlN32x2:
2581      case Iop_ShlN8x8:
2582         /* Same scheme as with all other shifts. */
2583         complainIfUndefined(mce, atom2, NULL);
2584         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2585
2586      case Iop_QNarrowBin32Sto16Sx4:
2587      case Iop_QNarrowBin16Sto8Sx8:
2588      case Iop_QNarrowBin16Sto8Ux8:
2589         return vectorNarrowBin64(mce, op, vatom1, vatom2);
2590
2591      case Iop_Min8Ux8:
2592      case Iop_Min8Sx8:
2593      case Iop_Max8Ux8:
2594      case Iop_Max8Sx8:
2595      case Iop_Avg8Ux8:
2596      case Iop_QSub8Sx8:
2597      case Iop_QSub8Ux8:
2598      case Iop_Sub8x8:
2599      case Iop_CmpGT8Sx8:
2600      case Iop_CmpGT8Ux8:
2601      case Iop_CmpEQ8x8:
2602      case Iop_QAdd8Sx8:
2603      case Iop_QAdd8Ux8:
2604      case Iop_QSal8x8:
2605      case Iop_QShl8x8:
2606      case Iop_Add8x8:
2607      case Iop_Mul8x8:
2608      case Iop_PolynomialMul8x8:
2609         return binary8Ix8(mce, vatom1, vatom2);
2610
2611      case Iop_Min16Sx4:
2612      case Iop_Min16Ux4:
2613      case Iop_Max16Sx4:
2614      case Iop_Max16Ux4:
2615      case Iop_Avg16Ux4:
2616      case Iop_QSub16Ux4:
2617      case Iop_QSub16Sx4:
2618      case Iop_Sub16x4:
2619      case Iop_Mul16x4:
2620      case Iop_MulHi16Sx4:
2621      case Iop_MulHi16Ux4:
2622      case Iop_CmpGT16Sx4:
2623      case Iop_CmpGT16Ux4:
2624      case Iop_CmpEQ16x4:
2625      case Iop_QAdd16Sx4:
2626      case Iop_QAdd16Ux4:
2627      case Iop_QSal16x4:
2628      case Iop_QShl16x4:
2629      case Iop_Add16x4:
2630      case Iop_QDMulHi16Sx4:
2631      case Iop_QRDMulHi16Sx4:
2632         return binary16Ix4(mce, vatom1, vatom2);
2633
2634      case Iop_Sub32x2:
2635      case Iop_Mul32x2:
2636      case Iop_Max32Sx2:
2637      case Iop_Max32Ux2:
2638      case Iop_Min32Sx2:
2639      case Iop_Min32Ux2:
2640      case Iop_CmpGT32Sx2:
2641      case Iop_CmpGT32Ux2:
2642      case Iop_CmpEQ32x2:
2643      case Iop_Add32x2:
2644      case Iop_QAdd32Ux2:
2645      case Iop_QAdd32Sx2:
2646      case Iop_QSub32Ux2:
2647      case Iop_QSub32Sx2:
2648      case Iop_QSal32x2:
2649      case Iop_QShl32x2:
2650      case Iop_QDMulHi32Sx2:
2651      case Iop_QRDMulHi32Sx2:
2652         return binary32Ix2(mce, vatom1, vatom2);
2653
2654      case Iop_QSub64Ux1:
2655      case Iop_QSub64Sx1:
2656      case Iop_QAdd64Ux1:
2657      case Iop_QAdd64Sx1:
2658      case Iop_QSal64x1:
2659      case Iop_QShl64x1:
2660      case Iop_Sal64x1:
2661         return binary64Ix1(mce, vatom1, vatom2);
2662
2663      case Iop_QShlN8Sx8:
2664      case Iop_QShlN8x8:
2665      case Iop_QSalN8x8:
2666         complainIfUndefined(mce, atom2, NULL);
2667         return mkPCast8x8(mce, vatom1);
2668
2669      case Iop_QShlN16Sx4:
2670      case Iop_QShlN16x4:
2671      case Iop_QSalN16x4:
2672         complainIfUndefined(mce, atom2, NULL);
2673         return mkPCast16x4(mce, vatom1);
2674
2675      case Iop_QShlN32Sx2:
2676      case Iop_QShlN32x2:
2677      case Iop_QSalN32x2:
2678         complainIfUndefined(mce, atom2, NULL);
2679         return mkPCast32x2(mce, vatom1);
2680
2681      case Iop_QShlN64Sx1:
2682      case Iop_QShlN64x1:
2683      case Iop_QSalN64x1:
2684         complainIfUndefined(mce, atom2, NULL);
2685         return mkPCast32x2(mce, vatom1);
2686
2687      case Iop_PwMax32Sx2:
2688      case Iop_PwMax32Ux2:
2689      case Iop_PwMin32Sx2:
2690      case Iop_PwMin32Ux2:
2691      case Iop_PwMax32Fx2:
2692      case Iop_PwMin32Fx2:
2693         return assignNew('V', mce, Ity_I64,
2694                          binop(Iop_PwMax32Ux2,
2695                                mkPCast32x2(mce, vatom1),
2696                                mkPCast32x2(mce, vatom2)));
2697
2698      case Iop_PwMax16Sx4:
2699      case Iop_PwMax16Ux4:
2700      case Iop_PwMin16Sx4:
2701      case Iop_PwMin16Ux4:
2702         return assignNew('V', mce, Ity_I64,
2703                          binop(Iop_PwMax16Ux4,
2704                                mkPCast16x4(mce, vatom1),
2705                                mkPCast16x4(mce, vatom2)));
2706
2707      case Iop_PwMax8Sx8:
2708      case Iop_PwMax8Ux8:
2709      case Iop_PwMin8Sx8:
2710      case Iop_PwMin8Ux8:
2711         return assignNew('V', mce, Ity_I64,
2712                          binop(Iop_PwMax8Ux8,
2713                                mkPCast8x8(mce, vatom1),
2714                                mkPCast8x8(mce, vatom2)));
2715
2716      case Iop_PwAdd32x2:
2717      case Iop_PwAdd32Fx2:
2718         return mkPCast32x2(mce,
2719               assignNew('V', mce, Ity_I64,
2720                         binop(Iop_PwAdd32x2,
2721                               mkPCast32x2(mce, vatom1),
2722                               mkPCast32x2(mce, vatom2))));
2723
2724      case Iop_PwAdd16x4:
2725         return mkPCast16x4(mce,
2726               assignNew('V', mce, Ity_I64,
2727                         binop(op, mkPCast16x4(mce, vatom1),
2728                                   mkPCast16x4(mce, vatom2))));
2729
2730      case Iop_PwAdd8x8:
2731         return mkPCast8x8(mce,
2732               assignNew('V', mce, Ity_I64,
2733                         binop(op, mkPCast8x8(mce, vatom1),
2734                                   mkPCast8x8(mce, vatom2))));
2735
2736      case Iop_Shl8x8:
2737      case Iop_Shr8x8:
2738      case Iop_Sar8x8:
2739      case Iop_Sal8x8:
2740         return mkUifU64(mce,
2741                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2742                   mkPCast8x8(mce,vatom2)
2743                );
2744
2745      case Iop_Shl16x4:
2746      case Iop_Shr16x4:
2747      case Iop_Sar16x4:
2748      case Iop_Sal16x4:
2749         return mkUifU64(mce,
2750                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2751                   mkPCast16x4(mce,vatom2)
2752                );
2753
2754      case Iop_Shl32x2:
2755      case Iop_Shr32x2:
2756      case Iop_Sar32x2:
2757      case Iop_Sal32x2:
2758         return mkUifU64(mce,
2759                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2760                   mkPCast32x2(mce,vatom2)
2761                );
2762
2763      /* 64-bit data-steering */
2764      case Iop_InterleaveLO32x2:
2765      case Iop_InterleaveLO16x4:
2766      case Iop_InterleaveLO8x8:
2767      case Iop_InterleaveHI32x2:
2768      case Iop_InterleaveHI16x4:
2769      case Iop_InterleaveHI8x8:
2770      case Iop_CatOddLanes8x8:
2771      case Iop_CatEvenLanes8x8:
2772      case Iop_CatOddLanes16x4:
2773      case Iop_CatEvenLanes16x4:
2774      case Iop_InterleaveOddLanes8x8:
2775      case Iop_InterleaveEvenLanes8x8:
2776      case Iop_InterleaveOddLanes16x4:
2777      case Iop_InterleaveEvenLanes16x4:
2778         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2779
2780      case Iop_GetElem8x8:
2781         complainIfUndefined(mce, atom2, NULL);
2782         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2783      case Iop_GetElem16x4:
2784         complainIfUndefined(mce, atom2, NULL);
2785         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2786      case Iop_GetElem32x2:
2787         complainIfUndefined(mce, atom2, NULL);
2788         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2789
2790      /* Perm8x8: rearrange values in left arg using steering values
2791        from right arg.  So rearrange the vbits in the same way but
2792        pessimise wrt steering values. */
2793      case Iop_Perm8x8:
2794         return mkUifU64(
2795                   mce,
2796                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2797                   mkPCast8x8(mce, vatom2)
2798                );
2799
2800      /* V128-bit SIMD */
2801
2802      case Iop_ShrN8x16:
2803      case Iop_ShrN16x8:
2804      case Iop_ShrN32x4:
2805      case Iop_ShrN64x2:
2806      case Iop_SarN8x16:
2807      case Iop_SarN16x8:
2808      case Iop_SarN32x4:
2809      case Iop_SarN64x2:
2810      case Iop_ShlN8x16:
2811      case Iop_ShlN16x8:
2812      case Iop_ShlN32x4:
2813      case Iop_ShlN64x2:
2814         /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2815            this is wrong now, scalar shifts are done properly lazily.
2816            Vector shifts should be fixed too. */
2817         complainIfUndefined(mce, atom2, NULL);
2818         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2819
2820      /* V x V shifts/rotates are done using the standard lazy scheme. */
2821      case Iop_Shl8x16:
2822      case Iop_Shr8x16:
2823      case Iop_Sar8x16:
2824      case Iop_Sal8x16:
2825      case Iop_Rol8x16:
2826         return mkUifUV128(mce,
2827                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2828                   mkPCast8x16(mce,vatom2)
2829                );
2830
2831      case Iop_Shl16x8:
2832      case Iop_Shr16x8:
2833      case Iop_Sar16x8:
2834      case Iop_Sal16x8:
2835      case Iop_Rol16x8:
2836         return mkUifUV128(mce,
2837                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2838                   mkPCast16x8(mce,vatom2)
2839                );
2840
2841      case Iop_Shl32x4:
2842      case Iop_Shr32x4:
2843      case Iop_Sar32x4:
2844      case Iop_Sal32x4:
2845      case Iop_Rol32x4:
2846         return mkUifUV128(mce,
2847                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2848                   mkPCast32x4(mce,vatom2)
2849                );
2850
2851      case Iop_Shl64x2:
2852      case Iop_Shr64x2:
2853      case Iop_Sar64x2:
2854      case Iop_Sal64x2:
2855         return mkUifUV128(mce,
2856                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2857                   mkPCast64x2(mce,vatom2)
2858                );
2859
2860      case Iop_F32ToFixed32Ux4_RZ:
2861      case Iop_F32ToFixed32Sx4_RZ:
2862      case Iop_Fixed32UToF32x4_RN:
2863      case Iop_Fixed32SToF32x4_RN:
2864         complainIfUndefined(mce, atom2, NULL);
2865         return mkPCast32x4(mce, vatom1);
2866
2867      case Iop_F32ToFixed32Ux2_RZ:
2868      case Iop_F32ToFixed32Sx2_RZ:
2869      case Iop_Fixed32UToF32x2_RN:
2870      case Iop_Fixed32SToF32x2_RN:
2871         complainIfUndefined(mce, atom2, NULL);
2872         return mkPCast32x2(mce, vatom1);
2873
2874      case Iop_QSub8Ux16:
2875      case Iop_QSub8Sx16:
2876      case Iop_Sub8x16:
2877      case Iop_Min8Ux16:
2878      case Iop_Min8Sx16:
2879      case Iop_Max8Ux16:
2880      case Iop_Max8Sx16:
2881      case Iop_CmpGT8Sx16:
2882      case Iop_CmpGT8Ux16:
2883      case Iop_CmpEQ8x16:
2884      case Iop_Avg8Ux16:
2885      case Iop_Avg8Sx16:
2886      case Iop_QAdd8Ux16:
2887      case Iop_QAdd8Sx16:
2888      case Iop_QSal8x16:
2889      case Iop_QShl8x16:
2890      case Iop_Add8x16:
2891      case Iop_Mul8x16:
2892      case Iop_PolynomialMul8x16:
2893         return binary8Ix16(mce, vatom1, vatom2);
2894
2895      case Iop_QSub16Ux8:
2896      case Iop_QSub16Sx8:
2897      case Iop_Sub16x8:
2898      case Iop_Mul16x8:
2899      case Iop_MulHi16Sx8:
2900      case Iop_MulHi16Ux8:
2901      case Iop_Min16Sx8:
2902      case Iop_Min16Ux8:
2903      case Iop_Max16Sx8:
2904      case Iop_Max16Ux8:
2905      case Iop_CmpGT16Sx8:
2906      case Iop_CmpGT16Ux8:
2907      case Iop_CmpEQ16x8:
2908      case Iop_Avg16Ux8:
2909      case Iop_Avg16Sx8:
2910      case Iop_QAdd16Ux8:
2911      case Iop_QAdd16Sx8:
2912      case Iop_QSal16x8:
2913      case Iop_QShl16x8:
2914      case Iop_Add16x8:
2915      case Iop_QDMulHi16Sx8:
2916      case Iop_QRDMulHi16Sx8:
2917         return binary16Ix8(mce, vatom1, vatom2);
2918
2919      case Iop_Sub32x4:
2920      case Iop_CmpGT32Sx4:
2921      case Iop_CmpGT32Ux4:
2922      case Iop_CmpEQ32x4:
2923      case Iop_QAdd32Sx4:
2924      case Iop_QAdd32Ux4:
2925      case Iop_QSub32Sx4:
2926      case Iop_QSub32Ux4:
2927      case Iop_QSal32x4:
2928      case Iop_QShl32x4:
2929      case Iop_Avg32Ux4:
2930      case Iop_Avg32Sx4:
2931      case Iop_Add32x4:
2932      case Iop_Max32Ux4:
2933      case Iop_Max32Sx4:
2934      case Iop_Min32Ux4:
2935      case Iop_Min32Sx4:
2936      case Iop_Mul32x4:
2937      case Iop_QDMulHi32Sx4:
2938      case Iop_QRDMulHi32Sx4:
2939         return binary32Ix4(mce, vatom1, vatom2);
2940
2941      case Iop_Sub64x2:
2942      case Iop_Add64x2:
2943      case Iop_CmpEQ64x2:
2944      case Iop_CmpGT64Sx2:
2945      case Iop_QSal64x2:
2946      case Iop_QShl64x2:
2947      case Iop_QAdd64Ux2:
2948      case Iop_QAdd64Sx2:
2949      case Iop_QSub64Ux2:
2950      case Iop_QSub64Sx2:
2951         return binary64Ix2(mce, vatom1, vatom2);
2952
2953      case Iop_QNarrowBin32Sto16Sx8:
2954      case Iop_QNarrowBin32Uto16Ux8:
2955      case Iop_QNarrowBin32Sto16Ux8:
2956      case Iop_QNarrowBin16Sto8Sx16:
2957      case Iop_QNarrowBin16Uto8Ux16:
2958      case Iop_QNarrowBin16Sto8Ux16:
2959         return vectorNarrowBinV128(mce, op, vatom1, vatom2);
2960
2961      case Iop_Sub64Fx2:
2962      case Iop_Mul64Fx2:
2963      case Iop_Min64Fx2:
2964      case Iop_Max64Fx2:
2965      case Iop_Div64Fx2:
2966      case Iop_CmpLT64Fx2:
2967      case Iop_CmpLE64Fx2:
2968      case Iop_CmpEQ64Fx2:
2969      case Iop_CmpUN64Fx2:
2970      case Iop_Add64Fx2:
2971         return binary64Fx2(mce, vatom1, vatom2);
2972
2973      case Iop_Sub64F0x2:
2974      case Iop_Mul64F0x2:
2975      case Iop_Min64F0x2:
2976      case Iop_Max64F0x2:
2977      case Iop_Div64F0x2:
2978      case Iop_CmpLT64F0x2:
2979      case Iop_CmpLE64F0x2:
2980      case Iop_CmpEQ64F0x2:
2981      case Iop_CmpUN64F0x2:
2982      case Iop_Add64F0x2:
2983         return binary64F0x2(mce, vatom1, vatom2);
2984
2985      case Iop_Sub32Fx4:
2986      case Iop_Mul32Fx4:
2987      case Iop_Min32Fx4:
2988      case Iop_Max32Fx4:
2989      case Iop_Div32Fx4:
2990      case Iop_CmpLT32Fx4:
2991      case Iop_CmpLE32Fx4:
2992      case Iop_CmpEQ32Fx4:
2993      case Iop_CmpUN32Fx4:
2994      case Iop_CmpGT32Fx4:
2995      case Iop_CmpGE32Fx4:
2996      case Iop_Add32Fx4:
2997      case Iop_Recps32Fx4:
2998      case Iop_Rsqrts32Fx4:
2999         return binary32Fx4(mce, vatom1, vatom2);
3000
3001      case Iop_Sub32Fx2:
3002      case Iop_Mul32Fx2:
3003      case Iop_Min32Fx2:
3004      case Iop_Max32Fx2:
3005      case Iop_CmpEQ32Fx2:
3006      case Iop_CmpGT32Fx2:
3007      case Iop_CmpGE32Fx2:
3008      case Iop_Add32Fx2:
3009      case Iop_Recps32Fx2:
3010      case Iop_Rsqrts32Fx2:
3011         return binary32Fx2(mce, vatom1, vatom2);
3012
3013      case Iop_Sub32F0x4:
3014      case Iop_Mul32F0x4:
3015      case Iop_Min32F0x4:
3016      case Iop_Max32F0x4:
3017      case Iop_Div32F0x4:
3018      case Iop_CmpLT32F0x4:
3019      case Iop_CmpLE32F0x4:
3020      case Iop_CmpEQ32F0x4:
3021      case Iop_CmpUN32F0x4:
3022      case Iop_Add32F0x4:
3023         return binary32F0x4(mce, vatom1, vatom2);
3024
3025      case Iop_QShlN8Sx16:
3026      case Iop_QShlN8x16:
3027      case Iop_QSalN8x16:
3028         complainIfUndefined(mce, atom2, NULL);
3029         return mkPCast8x16(mce, vatom1);
3030
3031      case Iop_QShlN16Sx8:
3032      case Iop_QShlN16x8:
3033      case Iop_QSalN16x8:
3034         complainIfUndefined(mce, atom2, NULL);
3035         return mkPCast16x8(mce, vatom1);
3036
3037      case Iop_QShlN32Sx4:
3038      case Iop_QShlN32x4:
3039      case Iop_QSalN32x4:
3040         complainIfUndefined(mce, atom2, NULL);
3041         return mkPCast32x4(mce, vatom1);
3042
3043      case Iop_QShlN64Sx2:
3044      case Iop_QShlN64x2:
3045      case Iop_QSalN64x2:
3046         complainIfUndefined(mce, atom2, NULL);
3047         return mkPCast32x4(mce, vatom1);
3048
3049      case Iop_Mull32Sx2:
3050      case Iop_Mull32Ux2:
3051      case Iop_QDMulLong32Sx2:
3052         return vectorWidenI64(mce, Iop_Widen32Sto64x2,
3053                                    mkUifU64(mce, vatom1, vatom2));
3054
3055      case Iop_Mull16Sx4:
3056      case Iop_Mull16Ux4:
3057      case Iop_QDMulLong16Sx4:
3058         return vectorWidenI64(mce, Iop_Widen16Sto32x4,
3059                                    mkUifU64(mce, vatom1, vatom2));
3060
3061      case Iop_Mull8Sx8:
3062      case Iop_Mull8Ux8:
3063      case Iop_PolynomialMull8x8:
3064         return vectorWidenI64(mce, Iop_Widen8Sto16x8,
3065                                    mkUifU64(mce, vatom1, vatom2));
3066
3067      case Iop_PwAdd32x4:
3068         return mkPCast32x4(mce,
3069               assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
3070                     mkPCast32x4(mce, vatom2))));
3071
3072      case Iop_PwAdd16x8:
3073         return mkPCast16x8(mce,
3074               assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
3075                     mkPCast16x8(mce, vatom2))));
3076
3077      case Iop_PwAdd8x16:
3078         return mkPCast8x16(mce,
3079               assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
3080                     mkPCast8x16(mce, vatom2))));
3081
3082      /* V128-bit data-steering */
3083      case Iop_SetV128lo32:
3084      case Iop_SetV128lo64:
3085      case Iop_64HLtoV128:
3086      case Iop_InterleaveLO64x2:
3087      case Iop_InterleaveLO32x4:
3088      case Iop_InterleaveLO16x8:
3089      case Iop_InterleaveLO8x16:
3090      case Iop_InterleaveHI64x2:
3091      case Iop_InterleaveHI32x4:
3092      case Iop_InterleaveHI16x8:
3093      case Iop_InterleaveHI8x16:
3094      case Iop_CatOddLanes8x16:
3095      case Iop_CatOddLanes16x8:
3096      case Iop_CatOddLanes32x4:
3097      case Iop_CatEvenLanes8x16:
3098      case Iop_CatEvenLanes16x8:
3099      case Iop_CatEvenLanes32x4:
3100      case Iop_InterleaveOddLanes8x16:
3101      case Iop_InterleaveOddLanes16x8:
3102      case Iop_InterleaveOddLanes32x4:
3103      case Iop_InterleaveEvenLanes8x16:
3104      case Iop_InterleaveEvenLanes16x8:
3105      case Iop_InterleaveEvenLanes32x4:
3106         return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
3107
3108      case Iop_GetElem8x16:
3109         complainIfUndefined(mce, atom2, NULL);
3110         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
3111      case Iop_GetElem16x8:
3112         complainIfUndefined(mce, atom2, NULL);
3113         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
3114      case Iop_GetElem32x4:
3115         complainIfUndefined(mce, atom2, NULL);
3116         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
3117      case Iop_GetElem64x2:
3118         complainIfUndefined(mce, atom2, NULL);
3119         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
3120
3121     /* Perm8x16: rearrange values in left arg using steering values
3122        from right arg.  So rearrange the vbits in the same way but
3123        pessimise wrt steering values.  Perm32x4 ditto. */
3124      case Iop_Perm8x16:
3125         return mkUifUV128(
3126                   mce,
3127                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3128                   mkPCast8x16(mce, vatom2)
3129                );
3130      case Iop_Perm32x4:
3131         return mkUifUV128(
3132                   mce,
3133                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
3134                   mkPCast32x4(mce, vatom2)
3135                );
3136
3137     /* These two take the lower half of each 16-bit lane, sign/zero
3138        extend it to 32, and multiply together, producing a 32x4
3139        result (and implicitly ignoring half the operand bits).  So
3140        treat it as a bunch of independent 16x8 operations, but then
3141        do 32-bit shifts left-right to copy the lower half results
3142        (which are all 0s or all 1s due to PCasting in binary16Ix8)
3143        into the upper half of each result lane. */
3144      case Iop_MullEven16Ux8:
3145      case Iop_MullEven16Sx8: {
3146         IRAtom* at;
3147         at = binary16Ix8(mce,vatom1,vatom2);
3148         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
3149         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
3150	 return at;
3151      }
3152
3153      /* Same deal as Iop_MullEven16{S,U}x8 */
3154      case Iop_MullEven8Ux16:
3155      case Iop_MullEven8Sx16: {
3156         IRAtom* at;
3157         at = binary8Ix16(mce,vatom1,vatom2);
3158         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
3159         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
3160	 return at;
3161      }
3162
3163      /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
3164         32x4 -> 16x8 laneage, discarding the upper half of each lane.
3165         Simply apply same op to the V bits, since this really no more
3166         than a data steering operation. */
3167      case Iop_NarrowBin32to16x8:
3168      case Iop_NarrowBin16to8x16:
3169         return assignNew('V', mce, Ity_V128,
3170                                    binop(op, vatom1, vatom2));
3171
3172      case Iop_ShrV128:
3173      case Iop_ShlV128:
3174         /* Same scheme as with all other shifts.  Note: 10 Nov 05:
3175            this is wrong now, scalar shifts are done properly lazily.
3176            Vector shifts should be fixed too. */
3177         complainIfUndefined(mce, atom2, NULL);
3178         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
3179
3180      /* I128-bit data-steering */
3181      case Iop_64HLto128:
3182         return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
3183
3184      /* V256-bit SIMD */
3185
3186      case Iop_Add64Fx4:
3187      case Iop_Sub64Fx4:
3188      case Iop_Mul64Fx4:
3189      case Iop_Div64Fx4:
3190      case Iop_Max64Fx4:
3191      case Iop_Min64Fx4:
3192         return binary64Fx4(mce, vatom1, vatom2);
3193
3194      case Iop_Add32Fx8:
3195      case Iop_Sub32Fx8:
3196      case Iop_Mul32Fx8:
3197      case Iop_Div32Fx8:
3198      case Iop_Max32Fx8:
3199      case Iop_Min32Fx8:
3200         return binary32Fx8(mce, vatom1, vatom2);
3201
3202      /* V256-bit data-steering */
3203      case Iop_V128HLtoV256:
3204         return assignNew('V', mce, Ity_V256, binop(op, vatom1, vatom2));
3205
3206      /* Scalar floating point */
3207
3208      case Iop_F32toI64S:
3209         /* I32(rm) x F32 -> I64 */
3210         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3211
3212      case Iop_I64StoF32:
3213         /* I32(rm) x I64 -> F32 */
3214         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3215
3216      case Iop_RoundF64toInt:
3217      case Iop_RoundF64toF32:
3218      case Iop_F64toI64S:
3219      case Iop_F64toI64U:
3220      case Iop_I64StoF64:
3221      case Iop_I64UtoF64:
3222      case Iop_SinF64:
3223      case Iop_CosF64:
3224      case Iop_TanF64:
3225      case Iop_2xm1F64:
3226      case Iop_SqrtF64:
3227         /* I32(rm) x I64/F64 -> I64/F64 */
3228         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3229
3230      case Iop_ShlD64:
3231      case Iop_ShrD64:
3232      case Iop_RoundD64toInt:
3233         /* I32(DFP rm) x D64 -> D64 */
3234         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3235
3236      case Iop_ShlD128:
3237      case Iop_ShrD128:
3238      case Iop_RoundD128toInt:
3239         /* I32(DFP rm) x D128 -> D128 */
3240         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3241
3242      case Iop_D64toI64S:
3243      case Iop_I64StoD64:
3244         /* I64(DFP rm) x I64 -> D64 */
3245         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3246
3247      case Iop_RoundF32toInt:
3248      case Iop_SqrtF32:
3249         /* I32(rm) x I32/F32 -> I32/F32 */
3250         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3251
3252      case Iop_SqrtF128:
3253         /* I32(rm) x F128 -> F128 */
3254         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3255
3256      case Iop_I32StoF32:
3257      case Iop_F32toI32S:
3258         /* First arg is I32 (rounding mode), second is F32/I32 (data). */
3259         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3260
3261      case Iop_F128toI32S: /* IRRoundingMode(I32) x F128 -> signed I32  */
3262      case Iop_F128toF32:  /* IRRoundingMode(I32) x F128 -> F32         */
3263         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3264
3265      case Iop_F128toI64S: /* IRRoundingMode(I32) x F128 -> signed I64  */
3266      case Iop_F128toF64:  /* IRRoundingMode(I32) x F128 -> F64         */
3267      case Iop_D128toD64:  /* IRRoundingModeDFP(I64) x D128 -> D64 */
3268      case Iop_D128toI64S: /* IRRoundingModeDFP(I64) x D128 -> signed I64  */
3269         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3270
3271      case Iop_F64HLtoF128:
3272      case Iop_D64HLtoD128:
3273         return assignNew('V', mce, Ity_I128,
3274                          binop(Iop_64HLto128, vatom1, vatom2));
3275
3276      case Iop_F64toI32U:
3277      case Iop_F64toI32S:
3278      case Iop_F64toF32:
3279      case Iop_I64UtoF32:
3280         /* First arg is I32 (rounding mode), second is F64 (data). */
3281         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3282
3283      case Iop_D64toD32:
3284         /* First arg is I64 (DFProunding mode), second is D64 (data). */
3285         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3286
3287      case Iop_F64toI16S:
3288         /* First arg is I32 (rounding mode), second is F64 (data). */
3289         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
3290
3291      case Iop_InsertExpD64:
3292         /*  I64 x I64 -> D64 */
3293         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3294
3295      case Iop_InsertExpD128:
3296         /*  I64 x I128 -> D128 */
3297         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3298
3299      case Iop_CmpF32:
3300      case Iop_CmpF64:
3301      case Iop_CmpF128:
3302      case Iop_CmpD64:
3303      case Iop_CmpD128:
3304         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3305
3306      /* non-FP after here */
3307
3308      case Iop_DivModU64to32:
3309      case Iop_DivModS64to32:
3310         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3311
3312      case Iop_DivModU128to64:
3313      case Iop_DivModS128to64:
3314         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
3315
3316      case Iop_16HLto32:
3317         return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
3318      case Iop_32HLto64:
3319         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
3320
3321      case Iop_DivModS64to64:
3322      case Iop_MullS64:
3323      case Iop_MullU64: {
3324         IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3325         IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
3326         return assignNew('V', mce, Ity_I128,
3327                          binop(Iop_64HLto128, vHi64, vLo64));
3328      }
3329
3330      case Iop_MullS32:
3331      case Iop_MullU32: {
3332         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3333         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
3334         return assignNew('V', mce, Ity_I64,
3335                          binop(Iop_32HLto64, vHi32, vLo32));
3336      }
3337
3338      case Iop_MullS16:
3339      case Iop_MullU16: {
3340         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3341         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
3342         return assignNew('V', mce, Ity_I32,
3343                          binop(Iop_16HLto32, vHi16, vLo16));
3344      }
3345
3346      case Iop_MullS8:
3347      case Iop_MullU8: {
3348         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3349         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
3350         return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
3351      }
3352
3353      case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
3354      case Iop_DivS32:
3355      case Iop_DivU32:
3356      case Iop_DivU32E:
3357      case Iop_DivS32E:
3358      case Iop_QAdd32S: /* could probably do better */
3359      case Iop_QSub32S: /* could probably do better */
3360         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
3361
3362      case Iop_DivS64:
3363      case Iop_DivU64:
3364      case Iop_DivS64E:
3365      case Iop_DivU64E:
3366         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
3367
3368      case Iop_Add32:
3369         if (mce->bogusLiterals || mce->useLLVMworkarounds)
3370            return expensiveAddSub(mce,True,Ity_I32,
3371                                   vatom1,vatom2, atom1,atom2);
3372         else
3373            goto cheap_AddSub32;
3374      case Iop_Sub32:
3375         if (mce->bogusLiterals)
3376            return expensiveAddSub(mce,False,Ity_I32,
3377                                   vatom1,vatom2, atom1,atom2);
3378         else
3379            goto cheap_AddSub32;
3380
3381      cheap_AddSub32:
3382      case Iop_Mul32:
3383         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
3384
3385      case Iop_CmpORD32S:
3386      case Iop_CmpORD32U:
3387      case Iop_CmpORD64S:
3388      case Iop_CmpORD64U:
3389         return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
3390
3391      case Iop_Add64:
3392         if (mce->bogusLiterals || mce->useLLVMworkarounds)
3393            return expensiveAddSub(mce,True,Ity_I64,
3394                                   vatom1,vatom2, atom1,atom2);
3395         else
3396            goto cheap_AddSub64;
3397      case Iop_Sub64:
3398         if (mce->bogusLiterals)
3399            return expensiveAddSub(mce,False,Ity_I64,
3400                                   vatom1,vatom2, atom1,atom2);
3401         else
3402            goto cheap_AddSub64;
3403
3404      cheap_AddSub64:
3405      case Iop_Mul64:
3406         return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
3407
3408      case Iop_Mul16:
3409      case Iop_Add16:
3410      case Iop_Sub16:
3411         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
3412
3413      case Iop_Sub8:
3414      case Iop_Add8:
3415         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
3416
3417      case Iop_CmpEQ64:
3418      case Iop_CmpNE64:
3419         if (mce->bogusLiterals)
3420            return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
3421         else
3422            goto cheap_cmp64;
3423      cheap_cmp64:
3424      case Iop_CmpLE64S: case Iop_CmpLE64U:
3425      case Iop_CmpLT64U: case Iop_CmpLT64S:
3426         return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
3427
3428      case Iop_CmpEQ32:
3429      case Iop_CmpNE32:
3430         if (mce->bogusLiterals)
3431            return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
3432         else
3433            goto cheap_cmp32;
3434      cheap_cmp32:
3435      case Iop_CmpLE32S: case Iop_CmpLE32U:
3436      case Iop_CmpLT32U: case Iop_CmpLT32S:
3437         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3438
3439      case Iop_CmpEQ16: case Iop_CmpNE16:
3440         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3441
3442      case Iop_CmpEQ8: case Iop_CmpNE8:
3443         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3444
3445      case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3446      case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3447      case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3448      case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3449         /* Just say these all produce a defined result, regardless
3450            of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3451         return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3452
3453      case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3454         return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3455
3456      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3457         return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3458
3459      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3460         return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3461
3462      case Iop_Shl8: case Iop_Shr8:
3463         return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3464
3465      case Iop_AndV256:
3466         uifu = mkUifUV256; difd = mkDifDV256;
3467         and_or_ty = Ity_V256; improve = mkImproveANDV256; goto do_And_Or;
3468      case Iop_AndV128:
3469         uifu = mkUifUV128; difd = mkDifDV128;
3470         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3471      case Iop_And64:
3472         uifu = mkUifU64; difd = mkDifD64;
3473         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3474      case Iop_And32:
3475         uifu = mkUifU32; difd = mkDifD32;
3476         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3477      case Iop_And16:
3478         uifu = mkUifU16; difd = mkDifD16;
3479         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3480      case Iop_And8:
3481         uifu = mkUifU8; difd = mkDifD8;
3482         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3483
3484      case Iop_OrV256:
3485         uifu = mkUifUV256; difd = mkDifDV256;
3486         and_or_ty = Ity_V256; improve = mkImproveORV256; goto do_And_Or;
3487      case Iop_OrV128:
3488         uifu = mkUifUV128; difd = mkDifDV128;
3489         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3490      case Iop_Or64:
3491         uifu = mkUifU64; difd = mkDifD64;
3492         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3493      case Iop_Or32:
3494         uifu = mkUifU32; difd = mkDifD32;
3495         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3496      case Iop_Or16:
3497         uifu = mkUifU16; difd = mkDifD16;
3498         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3499      case Iop_Or8:
3500         uifu = mkUifU8; difd = mkDifD8;
3501         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3502
3503      do_And_Or:
3504         return
3505         assignNew(
3506            'V', mce,
3507            and_or_ty,
3508            difd(mce, uifu(mce, vatom1, vatom2),
3509                      difd(mce, improve(mce, atom1, vatom1),
3510                                improve(mce, atom2, vatom2) ) ) );
3511
3512      case Iop_Xor8:
3513         return mkUifU8(mce, vatom1, vatom2);
3514      case Iop_Xor16:
3515         return mkUifU16(mce, vatom1, vatom2);
3516      case Iop_Xor32:
3517         return mkUifU32(mce, vatom1, vatom2);
3518      case Iop_Xor64:
3519         return mkUifU64(mce, vatom1, vatom2);
3520      case Iop_XorV128:
3521         return mkUifUV128(mce, vatom1, vatom2);
3522      case Iop_XorV256:
3523         return mkUifUV256(mce, vatom1, vatom2);
3524
3525      default:
3526         ppIROp(op);
3527         VG_(tool_panic)("memcheck:expr2vbits_Binop");
3528   }
3529}
3530
3531
3532static
3533IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3534{
3535   IRAtom* vatom = expr2vbits( mce, atom );
3536   tl_assert(isOriginalAtom(mce,atom));
3537   switch (op) {
3538
3539      case Iop_Sqrt64Fx2:
3540         return unary64Fx2(mce, vatom);
3541
3542      case Iop_Sqrt64F0x2:
3543         return unary64F0x2(mce, vatom);
3544
3545      case Iop_Sqrt32Fx8:
3546      case Iop_RSqrt32Fx8:
3547      case Iop_Recip32Fx8:
3548         return unary32Fx8(mce, vatom);
3549
3550      case Iop_Sqrt64Fx4:
3551         return unary64Fx4(mce, vatom);
3552
3553      case Iop_Sqrt32Fx4:
3554      case Iop_RSqrt32Fx4:
3555      case Iop_Recip32Fx4:
3556      case Iop_I32UtoFx4:
3557      case Iop_I32StoFx4:
3558      case Iop_QFtoI32Ux4_RZ:
3559      case Iop_QFtoI32Sx4_RZ:
3560      case Iop_RoundF32x4_RM:
3561      case Iop_RoundF32x4_RP:
3562      case Iop_RoundF32x4_RN:
3563      case Iop_RoundF32x4_RZ:
3564      case Iop_Recip32x4:
3565      case Iop_Abs32Fx4:
3566      case Iop_Neg32Fx4:
3567      case Iop_Rsqrte32Fx4:
3568         return unary32Fx4(mce, vatom);
3569
3570      case Iop_I32UtoFx2:
3571      case Iop_I32StoFx2:
3572      case Iop_Recip32Fx2:
3573      case Iop_Recip32x2:
3574      case Iop_Abs32Fx2:
3575      case Iop_Neg32Fx2:
3576      case Iop_Rsqrte32Fx2:
3577         return unary32Fx2(mce, vatom);
3578
3579      case Iop_Sqrt32F0x4:
3580      case Iop_RSqrt32F0x4:
3581      case Iop_Recip32F0x4:
3582         return unary32F0x4(mce, vatom);
3583
3584      case Iop_32UtoV128:
3585      case Iop_64UtoV128:
3586      case Iop_Dup8x16:
3587      case Iop_Dup16x8:
3588      case Iop_Dup32x4:
3589      case Iop_Reverse16_8x16:
3590      case Iop_Reverse32_8x16:
3591      case Iop_Reverse32_16x8:
3592      case Iop_Reverse64_8x16:
3593      case Iop_Reverse64_16x8:
3594      case Iop_Reverse64_32x4:
3595      case Iop_V256toV128_1: case Iop_V256toV128_0:
3596         return assignNew('V', mce, Ity_V128, unop(op, vatom));
3597
3598      case Iop_F128HItoF64:  /* F128 -> high half of F128 */
3599      case Iop_D128HItoD64:  /* D128 -> high half of D128 */
3600         return assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vatom));
3601      case Iop_F128LOtoF64:  /* F128 -> low  half of F128 */
3602      case Iop_D128LOtoD64:  /* D128 -> low  half of D128 */
3603         return assignNew('V', mce, Ity_I64, unop(Iop_128to64, vatom));
3604
3605      case Iop_NegF128:
3606      case Iop_AbsF128:
3607         return mkPCastTo(mce, Ity_I128, vatom);
3608
3609      case Iop_I32StoF128: /* signed I32 -> F128 */
3610      case Iop_I64StoF128: /* signed I64 -> F128 */
3611      case Iop_F32toF128:  /* F32 -> F128 */
3612      case Iop_F64toF128:  /* F64 -> F128 */
3613      case Iop_I64StoD128: /* signed I64 -> D128 */
3614         return mkPCastTo(mce, Ity_I128, vatom);
3615
3616      case Iop_F32toF64:
3617      case Iop_I32StoF64:
3618      case Iop_I32UtoF64:
3619      case Iop_NegF64:
3620      case Iop_AbsF64:
3621      case Iop_Est5FRSqrt:
3622      case Iop_RoundF64toF64_NEAREST:
3623      case Iop_RoundF64toF64_NegINF:
3624      case Iop_RoundF64toF64_PosINF:
3625      case Iop_RoundF64toF64_ZERO:
3626      case Iop_Clz64:
3627      case Iop_Ctz64:
3628      case Iop_D32toD64:
3629      case Iop_ExtractExpD64:    /* D64  -> I64 */
3630      case Iop_ExtractExpD128:   /* D128 -> I64 */
3631         return mkPCastTo(mce, Ity_I64, vatom);
3632
3633      case Iop_D64toD128:
3634         return mkPCastTo(mce, Ity_I128, vatom);
3635
3636      case Iop_Clz32:
3637      case Iop_Ctz32:
3638      case Iop_TruncF64asF32:
3639      case Iop_NegF32:
3640      case Iop_AbsF32:
3641         return mkPCastTo(mce, Ity_I32, vatom);
3642
3643      case Iop_1Uto64:
3644      case Iop_1Sto64:
3645      case Iop_8Uto64:
3646      case Iop_8Sto64:
3647      case Iop_16Uto64:
3648      case Iop_16Sto64:
3649      case Iop_32Sto64:
3650      case Iop_32Uto64:
3651      case Iop_V128to64:
3652      case Iop_V128HIto64:
3653      case Iop_128HIto64:
3654      case Iop_128to64:
3655      case Iop_Dup8x8:
3656      case Iop_Dup16x4:
3657      case Iop_Dup32x2:
3658      case Iop_Reverse16_8x8:
3659      case Iop_Reverse32_8x8:
3660      case Iop_Reverse32_16x4:
3661      case Iop_Reverse64_8x8:
3662      case Iop_Reverse64_16x4:
3663      case Iop_Reverse64_32x2:
3664      case Iop_V256to64_0: case Iop_V256to64_1:
3665      case Iop_V256to64_2: case Iop_V256to64_3:
3666         return assignNew('V', mce, Ity_I64, unop(op, vatom));
3667
3668      case Iop_64to32:
3669      case Iop_64HIto32:
3670      case Iop_1Uto32:
3671      case Iop_1Sto32:
3672      case Iop_8Uto32:
3673      case Iop_16Uto32:
3674      case Iop_16Sto32:
3675      case Iop_8Sto32:
3676      case Iop_V128to32:
3677         return assignNew('V', mce, Ity_I32, unop(op, vatom));
3678
3679      case Iop_8Sto16:
3680      case Iop_8Uto16:
3681      case Iop_32to16:
3682      case Iop_32HIto16:
3683      case Iop_64to16:
3684         return assignNew('V', mce, Ity_I16, unop(op, vatom));
3685
3686      case Iop_1Uto8:
3687      case Iop_1Sto8:
3688      case Iop_16to8:
3689      case Iop_16HIto8:
3690      case Iop_32to8:
3691      case Iop_64to8:
3692         return assignNew('V', mce, Ity_I8, unop(op, vatom));
3693
3694      case Iop_32to1:
3695         return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3696
3697      case Iop_64to1:
3698         return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3699
3700      case Iop_ReinterpF64asI64:
3701      case Iop_ReinterpI64asF64:
3702      case Iop_ReinterpI32asF32:
3703      case Iop_ReinterpF32asI32:
3704      case Iop_ReinterpI64asD64:
3705      case Iop_ReinterpD64asI64:
3706      case Iop_DPBtoBCD:
3707      case Iop_BCDtoDPB:
3708      case Iop_NotV256:
3709      case Iop_NotV128:
3710      case Iop_Not64:
3711      case Iop_Not32:
3712      case Iop_Not16:
3713      case Iop_Not8:
3714      case Iop_Not1:
3715         return vatom;
3716
3717      case Iop_CmpNEZ8x8:
3718      case Iop_Cnt8x8:
3719      case Iop_Clz8Sx8:
3720      case Iop_Cls8Sx8:
3721      case Iop_Abs8x8:
3722         return mkPCast8x8(mce, vatom);
3723
3724      case Iop_CmpNEZ8x16:
3725      case Iop_Cnt8x16:
3726      case Iop_Clz8Sx16:
3727      case Iop_Cls8Sx16:
3728      case Iop_Abs8x16:
3729         return mkPCast8x16(mce, vatom);
3730
3731      case Iop_CmpNEZ16x4:
3732      case Iop_Clz16Sx4:
3733      case Iop_Cls16Sx4:
3734      case Iop_Abs16x4:
3735         return mkPCast16x4(mce, vatom);
3736
3737      case Iop_CmpNEZ16x8:
3738      case Iop_Clz16Sx8:
3739      case Iop_Cls16Sx8:
3740      case Iop_Abs16x8:
3741         return mkPCast16x8(mce, vatom);
3742
3743      case Iop_CmpNEZ32x2:
3744      case Iop_Clz32Sx2:
3745      case Iop_Cls32Sx2:
3746      case Iop_FtoI32Ux2_RZ:
3747      case Iop_FtoI32Sx2_RZ:
3748      case Iop_Abs32x2:
3749         return mkPCast32x2(mce, vatom);
3750
3751      case Iop_CmpNEZ32x4:
3752      case Iop_Clz32Sx4:
3753      case Iop_Cls32Sx4:
3754      case Iop_FtoI32Ux4_RZ:
3755      case Iop_FtoI32Sx4_RZ:
3756      case Iop_Abs32x4:
3757         return mkPCast32x4(mce, vatom);
3758
3759      case Iop_CmpwNEZ64:
3760         return mkPCastTo(mce, Ity_I64, vatom);
3761
3762      case Iop_CmpNEZ64x2:
3763         return mkPCast64x2(mce, vatom);
3764
3765      case Iop_NarrowUn16to8x8:
3766      case Iop_NarrowUn32to16x4:
3767      case Iop_NarrowUn64to32x2:
3768      case Iop_QNarrowUn16Sto8Sx8:
3769      case Iop_QNarrowUn16Sto8Ux8:
3770      case Iop_QNarrowUn16Uto8Ux8:
3771      case Iop_QNarrowUn32Sto16Sx4:
3772      case Iop_QNarrowUn32Sto16Ux4:
3773      case Iop_QNarrowUn32Uto16Ux4:
3774      case Iop_QNarrowUn64Sto32Sx2:
3775      case Iop_QNarrowUn64Sto32Ux2:
3776      case Iop_QNarrowUn64Uto32Ux2:
3777         return vectorNarrowUnV128(mce, op, vatom);
3778
3779      case Iop_Widen8Sto16x8:
3780      case Iop_Widen8Uto16x8:
3781      case Iop_Widen16Sto32x4:
3782      case Iop_Widen16Uto32x4:
3783      case Iop_Widen32Sto64x2:
3784      case Iop_Widen32Uto64x2:
3785         return vectorWidenI64(mce, op, vatom);
3786
3787      case Iop_PwAddL32Ux2:
3788      case Iop_PwAddL32Sx2:
3789         return mkPCastTo(mce, Ity_I64,
3790               assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3791
3792      case Iop_PwAddL16Ux4:
3793      case Iop_PwAddL16Sx4:
3794         return mkPCast32x2(mce,
3795               assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3796
3797      case Iop_PwAddL8Ux8:
3798      case Iop_PwAddL8Sx8:
3799         return mkPCast16x4(mce,
3800               assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3801
3802      case Iop_PwAddL32Ux4:
3803      case Iop_PwAddL32Sx4:
3804         return mkPCast64x2(mce,
3805               assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3806
3807      case Iop_PwAddL16Ux8:
3808      case Iop_PwAddL16Sx8:
3809         return mkPCast32x4(mce,
3810               assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3811
3812      case Iop_PwAddL8Ux16:
3813      case Iop_PwAddL8Sx16:
3814         return mkPCast16x8(mce,
3815               assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3816
3817      case Iop_I64UtoF32:
3818      default:
3819         ppIROp(op);
3820         VG_(tool_panic)("memcheck:expr2vbits_Unop");
3821   }
3822}
3823
3824
3825/* Worker function; do not call directly. */
3826static
3827IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3828                              IREndness end, IRType ty,
3829                              IRAtom* addr, UInt bias )
3830{
3831   void*    helper;
3832   Char*    hname;
3833   IRDirty* di;
3834   IRTemp   datavbits;
3835   IRAtom*  addrAct;
3836
3837   tl_assert(isOriginalAtom(mce,addr));
3838   tl_assert(end == Iend_LE || end == Iend_BE);
3839
3840   /* First, emit a definedness test for the address.  This also sets
3841      the address (shadow) to 'defined' following the test. */
3842   complainIfUndefined( mce, addr, NULL );
3843
3844   /* Now cook up a call to the relevant helper function, to read the
3845      data V bits from shadow memory. */
3846   ty = shadowTypeV(ty);
3847
3848   if (end == Iend_LE) {
3849      switch (ty) {
3850         case Ity_I64: helper = &MC_(helperc_LOADV64le);
3851                       hname = "MC_(helperc_LOADV64le)";
3852                       break;
3853         case Ity_I32: helper = &MC_(helperc_LOADV32le);
3854                       hname = "MC_(helperc_LOADV32le)";
3855                       break;
3856         case Ity_I16: helper = &MC_(helperc_LOADV16le);
3857                       hname = "MC_(helperc_LOADV16le)";
3858                       break;
3859         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3860                       hname = "MC_(helperc_LOADV8)";
3861                       break;
3862         default:      ppIRType(ty);
3863                       VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3864      }
3865   } else {
3866      switch (ty) {
3867         case Ity_I64: helper = &MC_(helperc_LOADV64be);
3868                       hname = "MC_(helperc_LOADV64be)";
3869                       break;
3870         case Ity_I32: helper = &MC_(helperc_LOADV32be);
3871                       hname = "MC_(helperc_LOADV32be)";
3872                       break;
3873         case Ity_I16: helper = &MC_(helperc_LOADV16be);
3874                       hname = "MC_(helperc_LOADV16be)";
3875                       break;
3876         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3877                       hname = "MC_(helperc_LOADV8)";
3878                       break;
3879         default:      ppIRType(ty);
3880                       VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3881      }
3882   }
3883
3884   /* Generate the actual address into addrAct. */
3885   if (bias == 0) {
3886      addrAct = addr;
3887   } else {
3888      IROp    mkAdd;
3889      IRAtom* eBias;
3890      IRType  tyAddr  = mce->hWordTy;
3891      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3892      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3893      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3894      addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3895   }
3896
3897   /* We need to have a place to park the V bits we're just about to
3898      read. */
3899   datavbits = newTemp(mce, ty, VSh);
3900   di = unsafeIRDirty_1_N( datavbits,
3901                           1/*regparms*/,
3902                           hname, VG_(fnptr_to_fnentry)( helper ),
3903                           mkIRExprVec_1( addrAct ));
3904   setHelperAnns( mce, di );
3905   stmt( 'V', mce, IRStmt_Dirty(di) );
3906
3907   return mkexpr(datavbits);
3908}
3909
3910
3911static
3912IRAtom* expr2vbits_Load ( MCEnv* mce,
3913                          IREndness end, IRType ty,
3914                          IRAtom* addr, UInt bias )
3915{
3916   tl_assert(end == Iend_LE || end == Iend_BE);
3917   switch (shadowTypeV(ty)) {
3918      case Ity_I8:
3919      case Ity_I16:
3920      case Ity_I32:
3921      case Ity_I64:
3922         return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3923      case Ity_V128: {
3924         IRAtom *v64hi, *v64lo;
3925         if (end == Iend_LE) {
3926            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3927            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3928         } else {
3929            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3930            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3931         }
3932         return assignNew( 'V', mce,
3933                           Ity_V128,
3934                           binop(Iop_64HLtoV128, v64hi, v64lo));
3935      }
3936      case Ity_V256: {
3937         /* V256-bit case -- phrased in terms of 64 bit units (Qs),
3938            with Q3 being the most significant lane. */
3939         if (end == Iend_BE) goto unhandled;
3940         IRAtom* v64Q0 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+0);
3941         IRAtom* v64Q1 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3942         IRAtom* v64Q2 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+16);
3943         IRAtom* v64Q3 = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+24);
3944         return assignNew( 'V', mce,
3945                           Ity_V256,
3946                           IRExpr_Qop(Iop_64x4toV256,
3947                                      v64Q3, v64Q2, v64Q1, v64Q0));
3948      }
3949      unhandled:
3950      default:
3951         VG_(tool_panic)("expr2vbits_Load");
3952   }
3953}
3954
3955
3956/* If there is no guard expression or the guard is always TRUE this function
3957   behaves like expr2vbits_Load. If the guard is not true at runtime, an
3958   all-bits-defined bit pattern will be returned.
3959   It is assumed that definedness of GUARD has already been checked at the call
3960   site. */
3961static
3962IRAtom* expr2vbits_guarded_Load ( MCEnv* mce,
3963                                  IREndness end, IRType ty,
3964                                  IRAtom* addr, UInt bias, IRAtom *guard )
3965{
3966   if (guard) {
3967      IRAtom *cond, *iffalse, *iftrue;
3968
3969      cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, guard));
3970      iftrue  = assignNew('V', mce, ty,
3971                          expr2vbits_Load(mce, end, ty, addr, bias));
3972      iffalse = assignNew('V', mce, ty, definedOfType(ty));
3973
3974      return assignNew('V', mce, ty, IRExpr_Mux0X(cond, iffalse, iftrue));
3975   }
3976
3977   /* No guard expression or unconditional load */
3978   return expr2vbits_Load(mce, end, ty, addr, bias);
3979}
3980
3981
3982static
3983IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3984                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3985{
3986   IRAtom *vbitsC, *vbits0, *vbitsX;
3987   IRType ty;
3988   /* Given Mux0X(cond,expr0,exprX), generate
3989         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3990      That is, steer the V bits like the originals, but trash the
3991      result if the steering value is undefined.  This gives
3992      lazy propagation. */
3993   tl_assert(isOriginalAtom(mce, cond));
3994   tl_assert(isOriginalAtom(mce, expr0));
3995   tl_assert(isOriginalAtom(mce, exprX));
3996
3997   vbitsC = expr2vbits(mce, cond);
3998   vbits0 = expr2vbits(mce, expr0);
3999   vbitsX = expr2vbits(mce, exprX);
4000   ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
4001
4002   return
4003      mkUifU(mce, ty, assignNew('V', mce, ty,
4004                                     IRExpr_Mux0X(cond, vbits0, vbitsX)),
4005                      mkPCastTo(mce, ty, vbitsC) );
4006}
4007
4008/* --------- This is the main expression-handling function. --------- */
4009
4010static
4011IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
4012{
4013   switch (e->tag) {
4014
4015      case Iex_Get:
4016         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
4017
4018      case Iex_GetI:
4019         return shadow_GETI( mce, e->Iex.GetI.descr,
4020                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
4021
4022      case Iex_RdTmp:
4023         return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
4024
4025      case Iex_Const:
4026         return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
4027
4028      case Iex_Qop:
4029         return expr2vbits_Qop(
4030                   mce,
4031                   e->Iex.Qop.details->op,
4032                   e->Iex.Qop.details->arg1, e->Iex.Qop.details->arg2,
4033                   e->Iex.Qop.details->arg3, e->Iex.Qop.details->arg4
4034                );
4035
4036      case Iex_Triop:
4037         return expr2vbits_Triop(
4038                   mce,
4039                   e->Iex.Triop.details->op,
4040                   e->Iex.Triop.details->arg1, e->Iex.Triop.details->arg2,
4041                   e->Iex.Triop.details->arg3
4042                );
4043
4044      case Iex_Binop:
4045         return expr2vbits_Binop(
4046                   mce,
4047                   e->Iex.Binop.op,
4048                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
4049                );
4050
4051      case Iex_Unop:
4052         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
4053
4054      case Iex_Load:
4055         return expr2vbits_Load( mce, e->Iex.Load.end,
4056                                      e->Iex.Load.ty,
4057                                      e->Iex.Load.addr, 0/*addr bias*/ );
4058
4059      case Iex_CCall:
4060         return mkLazyN( mce, e->Iex.CCall.args,
4061                              e->Iex.CCall.retty,
4062                              e->Iex.CCall.cee );
4063
4064      case Iex_Mux0X:
4065         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
4066                                       e->Iex.Mux0X.exprX);
4067
4068      default:
4069         VG_(printf)("\n");
4070         ppIRExpr(e);
4071         VG_(printf)("\n");
4072         VG_(tool_panic)("memcheck: expr2vbits");
4073   }
4074}
4075
4076/*------------------------------------------------------------*/
4077/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
4078/*------------------------------------------------------------*/
4079
4080/* Widen a value to the host word size. */
4081
4082static
4083IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
4084{
4085   IRType ty, tyH;
4086
4087   /* vatom is vbits-value and as such can only have a shadow type. */
4088   tl_assert(isShadowAtom(mce,vatom));
4089
4090   ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
4091   tyH = mce->hWordTy;
4092
4093   if (tyH == Ity_I32) {
4094      switch (ty) {
4095         case Ity_I32:
4096            return vatom;
4097         case Ity_I16:
4098            return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
4099         case Ity_I8:
4100            return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
4101         default:
4102            goto unhandled;
4103      }
4104   } else
4105   if (tyH == Ity_I64) {
4106      switch (ty) {
4107         case Ity_I32:
4108            return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
4109         case Ity_I16:
4110            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4111                   assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
4112         case Ity_I8:
4113            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
4114                   assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
4115         default:
4116            goto unhandled;
4117      }
4118   } else {
4119      goto unhandled;
4120   }
4121  unhandled:
4122   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
4123   VG_(tool_panic)("zwidenToHostWord");
4124}
4125
4126
4127/* Generate a shadow store.  addr is always the original address atom.
4128   You can pass in either originals or V-bits for the data atom, but
4129   obviously not both.  guard :: Ity_I1 controls whether the store
4130   really happens; NULL means it unconditionally does.  Note that
4131   guard itself is not checked for definedness; the caller of this
4132   function must do that if necessary. */
4133
4134static
4135void do_shadow_Store ( MCEnv* mce,
4136                       IREndness end,
4137                       IRAtom* addr, UInt bias,
4138                       IRAtom* data, IRAtom* vdata,
4139                       IRAtom* guard )
4140{
4141   IROp     mkAdd;
4142   IRType   ty, tyAddr;
4143   void*    helper = NULL;
4144   Char*    hname = NULL;
4145   IRConst* c;
4146
4147   tyAddr = mce->hWordTy;
4148   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
4149   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
4150   tl_assert( end == Iend_LE || end == Iend_BE );
4151
4152   if (data) {
4153      tl_assert(!vdata);
4154      tl_assert(isOriginalAtom(mce, data));
4155      tl_assert(bias == 0);
4156      vdata = expr2vbits( mce, data );
4157   } else {
4158      tl_assert(vdata);
4159   }
4160
4161   tl_assert(isOriginalAtom(mce,addr));
4162   tl_assert(isShadowAtom(mce,vdata));
4163
4164   if (guard) {
4165      tl_assert(isOriginalAtom(mce, guard));
4166      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
4167   }
4168
4169   ty = typeOfIRExpr(mce->sb->tyenv, vdata);
4170
4171   // If we're not doing undefined value checking, pretend that this value
4172   // is "all valid".  That lets Vex's optimiser remove some of the V bit
4173   // shadow computation ops that precede it.
4174   if (MC_(clo_mc_level) == 1) {
4175      switch (ty) {
4176         case Ity_V256: // V256 weirdness -- used four times
4177                        c = IRConst_V256(V_BITS32_DEFINED); break;
4178         case Ity_V128: // V128 weirdness -- used twice
4179                        c = IRConst_V128(V_BITS16_DEFINED); break;
4180         case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
4181         case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
4182         case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
4183         case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
4184         default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4185      }
4186      vdata = IRExpr_Const( c );
4187   }
4188
4189   /* First, emit a definedness test for the address.  This also sets
4190      the address (shadow) to 'defined' following the test. */
4191   complainIfUndefined( mce, addr, guard );
4192
4193   /* Now decide which helper function to call to write the data V
4194      bits into shadow memory. */
4195   if (end == Iend_LE) {
4196      switch (ty) {
4197         case Ity_V256: /* we'll use the helper four times */
4198         case Ity_V128: /* we'll use the helper twice */
4199         case Ity_I64: helper = &MC_(helperc_STOREV64le);
4200                       hname = "MC_(helperc_STOREV64le)";
4201                       break;
4202         case Ity_I32: helper = &MC_(helperc_STOREV32le);
4203                       hname = "MC_(helperc_STOREV32le)";
4204                       break;
4205         case Ity_I16: helper = &MC_(helperc_STOREV16le);
4206                       hname = "MC_(helperc_STOREV16le)";
4207                       break;
4208         case Ity_I8:  helper = &MC_(helperc_STOREV8);
4209                       hname = "MC_(helperc_STOREV8)";
4210                       break;
4211         default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
4212      }
4213   } else {
4214      switch (ty) {
4215         case Ity_V128: /* we'll use the helper twice */
4216         case Ity_I64: helper = &MC_(helperc_STOREV64be);
4217                       hname = "MC_(helperc_STOREV64be)";
4218                       break;
4219         case Ity_I32: helper = &MC_(helperc_STOREV32be);
4220                       hname = "MC_(helperc_STOREV32be)";
4221                       break;
4222         case Ity_I16: helper = &MC_(helperc_STOREV16be);
4223                       hname = "MC_(helperc_STOREV16be)";
4224                       break;
4225         case Ity_I8:  helper = &MC_(helperc_STOREV8);
4226                       hname = "MC_(helperc_STOREV8)";
4227                       break;
4228         /* Note, no V256 case here, because no big-endian target that
4229            we support, has 256 vectors. */
4230         default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
4231      }
4232   }
4233
4234   if (UNLIKELY(ty == Ity_V256)) {
4235
4236      /* V256-bit case -- phrased in terms of 64 bit units (Qs), with
4237         Q3 being the most significant lane. */
4238      /* These are the offsets of the Qs in memory. */
4239      Int     offQ0, offQ1, offQ2, offQ3;
4240
4241      /* Various bits for constructing the 4 lane helper calls */
4242      IRDirty *diQ0,    *diQ1,    *diQ2,    *diQ3;
4243      IRAtom  *addrQ0,  *addrQ1,  *addrQ2,  *addrQ3;
4244      IRAtom  *vdataQ0, *vdataQ1, *vdataQ2, *vdataQ3;
4245      IRAtom  *eBiasQ0, *eBiasQ1, *eBiasQ2, *eBiasQ3;
4246
4247      if (end == Iend_LE) {
4248         offQ0 = 0; offQ1 = 8; offQ2 = 16; offQ3 = 24;
4249      } else {
4250         offQ3 = 0; offQ2 = 8; offQ1 = 16; offQ0 = 24;
4251      }
4252
4253      eBiasQ0 = tyAddr==Ity_I32 ? mkU32(bias+offQ0) : mkU64(bias+offQ0);
4254      addrQ0  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ0) );
4255      vdataQ0 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_0, vdata));
4256      diQ0    = unsafeIRDirty_0_N(
4257                   1/*regparms*/,
4258                   hname, VG_(fnptr_to_fnentry)( helper ),
4259                   mkIRExprVec_2( addrQ0, vdataQ0 )
4260                );
4261
4262      eBiasQ1 = tyAddr==Ity_I32 ? mkU32(bias+offQ1) : mkU64(bias+offQ1);
4263      addrQ1  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ1) );
4264      vdataQ1 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_1, vdata));
4265      diQ1    = unsafeIRDirty_0_N(
4266                   1/*regparms*/,
4267                   hname, VG_(fnptr_to_fnentry)( helper ),
4268                   mkIRExprVec_2( addrQ1, vdataQ1 )
4269                );
4270
4271      eBiasQ2 = tyAddr==Ity_I32 ? mkU32(bias+offQ2) : mkU64(bias+offQ2);
4272      addrQ2  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ2) );
4273      vdataQ2 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_2, vdata));
4274      diQ2    = unsafeIRDirty_0_N(
4275                   1/*regparms*/,
4276                   hname, VG_(fnptr_to_fnentry)( helper ),
4277                   mkIRExprVec_2( addrQ2, vdataQ2 )
4278                );
4279
4280      eBiasQ3 = tyAddr==Ity_I32 ? mkU32(bias+offQ3) : mkU64(bias+offQ3);
4281      addrQ3  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasQ3) );
4282      vdataQ3 = assignNew('V', mce, Ity_I64, unop(Iop_V256to64_3, vdata));
4283      diQ3    = unsafeIRDirty_0_N(
4284                   1/*regparms*/,
4285                   hname, VG_(fnptr_to_fnentry)( helper ),
4286                   mkIRExprVec_2( addrQ3, vdataQ3 )
4287                );
4288
4289      if (guard)
4290         diQ0->guard = diQ1->guard = diQ2->guard = diQ3->guard = guard;
4291
4292      setHelperAnns( mce, diQ0 );
4293      setHelperAnns( mce, diQ1 );
4294      setHelperAnns( mce, diQ2 );
4295      setHelperAnns( mce, diQ3 );
4296      stmt( 'V', mce, IRStmt_Dirty(diQ0) );
4297      stmt( 'V', mce, IRStmt_Dirty(diQ1) );
4298      stmt( 'V', mce, IRStmt_Dirty(diQ2) );
4299      stmt( 'V', mce, IRStmt_Dirty(diQ3) );
4300
4301   }
4302   else if (UNLIKELY(ty == Ity_V128)) {
4303
4304      /* V128-bit case */
4305      /* See comment in next clause re 64-bit regparms */
4306      /* also, need to be careful about endianness */
4307
4308      Int     offLo64, offHi64;
4309      IRDirty *diLo64, *diHi64;
4310      IRAtom  *addrLo64, *addrHi64;
4311      IRAtom  *vdataLo64, *vdataHi64;
4312      IRAtom  *eBiasLo64, *eBiasHi64;
4313
4314      if (end == Iend_LE) {
4315         offLo64 = 0;
4316         offHi64 = 8;
4317      } else {
4318         offLo64 = 8;
4319         offHi64 = 0;
4320      }
4321
4322      eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
4323      addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
4324      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
4325      diLo64    = unsafeIRDirty_0_N(
4326                     1/*regparms*/,
4327                     hname, VG_(fnptr_to_fnentry)( helper ),
4328                     mkIRExprVec_2( addrLo64, vdataLo64 )
4329                  );
4330      eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
4331      addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
4332      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
4333      diHi64    = unsafeIRDirty_0_N(
4334                     1/*regparms*/,
4335                     hname, VG_(fnptr_to_fnentry)( helper ),
4336                     mkIRExprVec_2( addrHi64, vdataHi64 )
4337                  );
4338      if (guard) diLo64->guard = guard;
4339      if (guard) diHi64->guard = guard;
4340      setHelperAnns( mce, diLo64 );
4341      setHelperAnns( mce, diHi64 );
4342      stmt( 'V', mce, IRStmt_Dirty(diLo64) );
4343      stmt( 'V', mce, IRStmt_Dirty(diHi64) );
4344
4345   } else {
4346
4347      IRDirty *di;
4348      IRAtom  *addrAct;
4349
4350      /* 8/16/32/64-bit cases */
4351      /* Generate the actual address into addrAct. */
4352      if (bias == 0) {
4353         addrAct = addr;
4354      } else {
4355         IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
4356         addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
4357      }
4358
4359      if (ty == Ity_I64) {
4360         /* We can't do this with regparm 2 on 32-bit platforms, since
4361            the back ends aren't clever enough to handle 64-bit
4362            regparm args.  Therefore be different. */
4363         di = unsafeIRDirty_0_N(
4364                 1/*regparms*/,
4365                 hname, VG_(fnptr_to_fnentry)( helper ),
4366                 mkIRExprVec_2( addrAct, vdata )
4367              );
4368      } else {
4369         di = unsafeIRDirty_0_N(
4370                 2/*regparms*/,
4371                 hname, VG_(fnptr_to_fnentry)( helper ),
4372                 mkIRExprVec_2( addrAct,
4373                                zwidenToHostWord( mce, vdata ))
4374              );
4375      }
4376      if (guard) di->guard = guard;
4377      setHelperAnns( mce, di );
4378      stmt( 'V', mce, IRStmt_Dirty(di) );
4379   }
4380
4381}
4382
4383
4384/* Do lazy pessimistic propagation through a dirty helper call, by
4385   looking at the annotations on it.  This is the most complex part of
4386   Memcheck. */
4387
4388static IRType szToITy ( Int n )
4389{
4390   switch (n) {
4391      case 1: return Ity_I8;
4392      case 2: return Ity_I16;
4393      case 4: return Ity_I32;
4394      case 8: return Ity_I64;
4395      default: VG_(tool_panic)("szToITy(memcheck)");
4396   }
4397}
4398
4399static
4400void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
4401{
4402   Int       i, k, n, toDo, gSz, gOff;
4403   IRAtom    *src, *here, *curr;
4404   IRType    tySrc, tyDst;
4405   IRTemp    dst;
4406   IREndness end;
4407
4408   /* What's the native endianness?  We need to know this. */
4409#  if defined(VG_BIGENDIAN)
4410   end = Iend_BE;
4411#  elif defined(VG_LITTLEENDIAN)
4412   end = Iend_LE;
4413#  else
4414#    error "Unknown endianness"
4415#  endif
4416
4417   /* First check the guard. */
4418   complainIfUndefined(mce, d->guard, NULL);
4419
4420   /* Now round up all inputs and PCast over them. */
4421   curr = definedOfType(Ity_I32);
4422
4423   /* Inputs: unmasked args
4424      Note: arguments are evaluated REGARDLESS of the guard expression */
4425   for (i = 0; d->args[i]; i++) {
4426      if (d->cee->mcx_mask & (1<<i)) {
4427         /* ignore this arg */
4428      } else {
4429         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
4430         curr = mkUifU32(mce, here, curr);
4431      }
4432   }
4433
4434   /* Inputs: guest state that we read. */
4435   for (i = 0; i < d->nFxState; i++) {
4436      tl_assert(d->fxState[i].fx != Ifx_None);
4437      if (d->fxState[i].fx == Ifx_Write)
4438         continue;
4439
4440      /* Enumerate the described state segments */
4441      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4442         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4443         gSz  = d->fxState[i].size;
4444
4445         /* Ignore any sections marked as 'always defined'. */
4446         if (isAlwaysDefd(mce, gOff, gSz)) {
4447            if (0)
4448            VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
4449                        gOff, gSz);
4450            continue;
4451         }
4452
4453         /* This state element is read or modified.  So we need to
4454            consider it.  If larger than 8 bytes, deal with it in
4455            8-byte chunks. */
4456         while (True) {
4457            tl_assert(gSz >= 0);
4458            if (gSz == 0) break;
4459            n = gSz <= 8 ? gSz : 8;
4460            /* update 'curr' with UifU of the state slice
4461               gOff .. gOff+n-1 */
4462            tySrc = szToITy( n );
4463
4464            /* Observe the guard expression. If it is false use an
4465               all-bits-defined bit pattern */
4466            IRAtom *cond, *iffalse, *iftrue;
4467
4468            cond    = assignNew('V', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
4469            iftrue  = assignNew('V', mce, tySrc, shadow_GET(mce, gOff, tySrc));
4470            iffalse = assignNew('V', mce, tySrc, definedOfType(tySrc));
4471            src     = assignNew('V', mce, tySrc,
4472                                IRExpr_Mux0X(cond, iffalse, iftrue));
4473
4474            here = mkPCastTo( mce, Ity_I32, src );
4475            curr = mkUifU32(mce, here, curr);
4476            gSz -= n;
4477            gOff += n;
4478         }
4479      }
4480   }
4481
4482   /* Inputs: memory.  First set up some info needed regardless of
4483      whether we're doing reads or writes. */
4484
4485   if (d->mFx != Ifx_None) {
4486      /* Because we may do multiple shadow loads/stores from the same
4487         base address, it's best to do a single test of its
4488         definedness right now.  Post-instrumentation optimisation
4489         should remove all but this test. */
4490      IRType tyAddr;
4491      tl_assert(d->mAddr);
4492      complainIfUndefined(mce, d->mAddr, d->guard);
4493
4494      tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
4495      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
4496      tl_assert(tyAddr == mce->hWordTy); /* not really right */
4497   }
4498
4499   /* Deal with memory inputs (reads or modifies) */
4500   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
4501      toDo   = d->mSize;
4502      /* chew off 32-bit chunks.  We don't care about the endianness
4503         since it's all going to be condensed down to a single bit,
4504         but nevertheless choose an endianness which is hopefully
4505         native to the platform. */
4506      while (toDo >= 4) {
4507         here = mkPCastTo(
4508                   mce, Ity_I32,
4509                   expr2vbits_guarded_Load ( mce, end, Ity_I32, d->mAddr,
4510                                             d->mSize - toDo, d->guard )
4511                );
4512         curr = mkUifU32(mce, here, curr);
4513         toDo -= 4;
4514      }
4515      /* chew off 16-bit chunks */
4516      while (toDo >= 2) {
4517         here = mkPCastTo(
4518                   mce, Ity_I32,
4519                   expr2vbits_guarded_Load ( mce, end, Ity_I16, d->mAddr,
4520                                             d->mSize - toDo, d->guard )
4521                );
4522         curr = mkUifU32(mce, here, curr);
4523         toDo -= 2;
4524      }
4525      /* chew off the remaining 8-bit chunk, if any */
4526      if (toDo == 1) {
4527         here = mkPCastTo(
4528                   mce, Ity_I32,
4529                   expr2vbits_guarded_Load ( mce, end, Ity_I8, d->mAddr,
4530                                             d->mSize - toDo, d->guard )
4531                );
4532         curr = mkUifU32(mce, here, curr);
4533         toDo -= 1;
4534      }
4535      tl_assert(toDo == 0);
4536   }
4537
4538   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
4539      all the inputs to the helper.  Now we need to re-distribute the
4540      results to all destinations. */
4541
4542   /* Outputs: the destination temporary, if there is one. */
4543   if (d->tmp != IRTemp_INVALID) {
4544      dst   = findShadowTmpV(mce, d->tmp);
4545      tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
4546      assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
4547   }
4548
4549   /* Outputs: guest state that we write or modify. */
4550   for (i = 0; i < d->nFxState; i++) {
4551      tl_assert(d->fxState[i].fx != Ifx_None);
4552      if (d->fxState[i].fx == Ifx_Read)
4553         continue;
4554
4555      /* Enumerate the described state segments */
4556      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
4557         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
4558         gSz  = d->fxState[i].size;
4559
4560         /* Ignore any sections marked as 'always defined'. */
4561         if (isAlwaysDefd(mce, gOff, gSz))
4562            continue;
4563
4564         /* This state element is written or modified.  So we need to
4565            consider it.  If larger than 8 bytes, deal with it in
4566            8-byte chunks. */
4567         while (True) {
4568            tl_assert(gSz >= 0);
4569            if (gSz == 0) break;
4570            n = gSz <= 8 ? gSz : 8;
4571            /* Write suitably-casted 'curr' to the state slice
4572               gOff .. gOff+n-1 */
4573            tyDst = szToITy( n );
4574            do_shadow_PUT( mce, gOff,
4575                                NULL, /* original atom */
4576                                mkPCastTo( mce, tyDst, curr ), d->guard );
4577            gSz -= n;
4578            gOff += n;
4579         }
4580      }
4581   }
4582
4583   /* Outputs: memory that we write or modify.  Same comments about
4584      endianness as above apply. */
4585   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
4586      toDo   = d->mSize;
4587      /* chew off 32-bit chunks */
4588      while (toDo >= 4) {
4589         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4590                          NULL, /* original data */
4591                          mkPCastTo( mce, Ity_I32, curr ),
4592                          d->guard );
4593         toDo -= 4;
4594      }
4595      /* chew off 16-bit chunks */
4596      while (toDo >= 2) {
4597         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4598                          NULL, /* original data */
4599                          mkPCastTo( mce, Ity_I16, curr ),
4600                          d->guard );
4601         toDo -= 2;
4602      }
4603      /* chew off the remaining 8-bit chunk, if any */
4604      if (toDo == 1) {
4605         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
4606                          NULL, /* original data */
4607                          mkPCastTo( mce, Ity_I8, curr ),
4608                          d->guard );
4609         toDo -= 1;
4610      }
4611      tl_assert(toDo == 0);
4612   }
4613
4614}
4615
4616
4617/* We have an ABI hint telling us that [base .. base+len-1] is to
4618   become undefined ("writable").  Generate code to call a helper to
4619   notify the A/V bit machinery of this fact.
4620
4621   We call
4622   void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
4623                                                    Addr nia );
4624*/
4625static
4626void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
4627{
4628   IRDirty* di;
4629   /* Minor optimisation: if not doing origin tracking, ignore the
4630      supplied nia and pass zero instead.  This is on the basis that
4631      MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
4632      almost always generate a shorter instruction to put zero into a
4633      register than any other value. */
4634   if (MC_(clo_mc_level) < 3)
4635      nia = mkIRExpr_HWord(0);
4636
4637   di = unsafeIRDirty_0_N(
4638           0/*regparms*/,
4639           "MC_(helperc_MAKE_STACK_UNINIT)",
4640           VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4641           mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4642        );
4643   stmt( 'V', mce, IRStmt_Dirty(di) );
4644}
4645
4646
4647/* ------ Dealing with IRCAS (big and complex) ------ */
4648
4649/* FWDS */
4650static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4651                             IRAtom* baseaddr, Int offset );
4652static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4653static void    gen_store_b ( MCEnv* mce, Int szB,
4654                             IRAtom* baseaddr, Int offset, IRAtom* dataB,
4655                             IRAtom* guard );
4656
4657static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4658static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4659
4660
4661/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4662   IRExpr.Consts, else this asserts.  If they are both Consts, it
4663   doesn't do anything.  So that just leaves the RdTmp case.
4664
4665   In which case: this assigns the shadow value SHADOW to the IR
4666   shadow temporary associated with ORIG.  That is, ORIG, being an
4667   original temporary, will have a shadow temporary associated with
4668   it.  However, in the case envisaged here, there will so far have
4669   been no IR emitted to actually write a shadow value into that
4670   temporary.  What this routine does is to (emit IR to) copy the
4671   value in SHADOW into said temporary, so that after this call,
4672   IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4673   value in SHADOW.
4674
4675   Point is to allow callers to compute "by hand" a shadow value for
4676   ORIG, and force it to be associated with ORIG.
4677
4678   How do we know that that shadow associated with ORIG has not so far
4679   been assigned to?  Well, we don't per se know that, but supposing
4680   it had.  Then this routine would create a second assignment to it,
4681   and later the IR sanity checker would barf.  But that never
4682   happens.  QED.
4683*/
4684static void bind_shadow_tmp_to_orig ( UChar how,
4685                                      MCEnv* mce,
4686                                      IRAtom* orig, IRAtom* shadow )
4687{
4688   tl_assert(isOriginalAtom(mce, orig));
4689   tl_assert(isShadowAtom(mce, shadow));
4690   switch (orig->tag) {
4691      case Iex_Const:
4692         tl_assert(shadow->tag == Iex_Const);
4693         break;
4694      case Iex_RdTmp:
4695         tl_assert(shadow->tag == Iex_RdTmp);
4696         if (how == 'V') {
4697            assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4698                   shadow);
4699         } else {
4700            tl_assert(how == 'B');
4701            assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4702                   shadow);
4703         }
4704         break;
4705      default:
4706         tl_assert(0);
4707   }
4708}
4709
4710
4711static
4712void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4713{
4714   /* Scheme is (both single- and double- cases):
4715
4716      1. fetch data#,dataB (the proposed new value)
4717
4718      2. fetch expd#,expdB (what we expect to see at the address)
4719
4720      3. check definedness of address
4721
4722      4. load old#,oldB from shadow memory; this also checks
4723         addressibility of the address
4724
4725      5. the CAS itself
4726
4727      6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4728
4729      7. if "expected == old" (as computed by (6))
4730            store data#,dataB to shadow memory
4731
4732      Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4733      'data' but 7 stores 'data#'.  Hence it is possible for the
4734      shadow data to be incorrectly checked and/or updated:
4735
4736      * 7 is at least gated correctly, since the 'expected == old'
4737        condition is derived from outputs of 5.  However, the shadow
4738        write could happen too late: imagine after 5 we are
4739        descheduled, a different thread runs, writes a different
4740        (shadow) value at the address, and then we resume, hence
4741        overwriting the shadow value written by the other thread.
4742
4743      Because the original memory access is atomic, there's no way to
4744      make both the original and shadow accesses into a single atomic
4745      thing, hence this is unavoidable.
4746
4747      At least as Valgrind stands, I don't think it's a problem, since
4748      we're single threaded *and* we guarantee that there are no
4749      context switches during the execution of any specific superblock
4750      -- context switches can only happen at superblock boundaries.
4751
4752      If Valgrind ever becomes MT in the future, then it might be more
4753      of a problem.  A possible kludge would be to artificially
4754      associate with the location, a lock, which we must acquire and
4755      release around the transaction as a whole.  Hmm, that probably
4756      would't work properly since it only guards us against other
4757      threads doing CASs on the same location, not against other
4758      threads doing normal reads and writes.
4759
4760      ------------------------------------------------------------
4761
4762      COMMENT_ON_CasCmpEQ:
4763
4764      Note two things.  Firstly, in the sequence above, we compute
4765      "expected == old", but we don't check definedness of it.  Why
4766      not?  Also, the x86 and amd64 front ends use
4767      Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4768      determination (expected == old ?) for themselves, and we also
4769      don't check definedness for those primops; we just say that the
4770      result is defined.  Why?  Details follow.
4771
4772      x86/amd64 contains various forms of locked insns:
4773      * lock prefix before all basic arithmetic insn;
4774        eg lock xorl %reg1,(%reg2)
4775      * atomic exchange reg-mem
4776      * compare-and-swaps
4777
4778      Rather than attempt to represent them all, which would be a
4779      royal PITA, I used a result from Maurice Herlihy
4780      (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4781      demonstrates that compare-and-swap is a primitive more general
4782      than the other two, and so can be used to represent all of them.
4783      So the translation scheme for (eg) lock incl (%reg) is as
4784      follows:
4785
4786        again:
4787         old = * %reg
4788         new = old + 1
4789         atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4790
4791      The "atomically" is the CAS bit.  The scheme is always the same:
4792      get old value from memory, compute new value, atomically stuff
4793      new value back in memory iff the old value has not changed (iow,
4794      no other thread modified it in the meantime).  If it has changed
4795      then we've been out-raced and we have to start over.
4796
4797      Now that's all very neat, but it has the bad side effect of
4798      introducing an explicit equality test into the translation.
4799      Consider the behaviour of said code on a memory location which
4800      is uninitialised.  We will wind up doing a comparison on
4801      uninitialised data, and mc duly complains.
4802
4803      What's difficult about this is, the common case is that the
4804      location is uncontended, and so we're usually comparing the same
4805      value (* %reg) with itself.  So we shouldn't complain even if it
4806      is undefined.  But mc doesn't know that.
4807
4808      My solution is to mark the == in the IR specially, so as to tell
4809      mc that it almost certainly compares a value with itself, and we
4810      should just regard the result as always defined.  Rather than
4811      add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4812      Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4813
4814      So there's always the question of, can this give a false
4815      negative?  eg, imagine that initially, * %reg is defined; and we
4816      read that; but then in the gap between the read and the CAS, a
4817      different thread writes an undefined (and different) value at
4818      the location.  Then the CAS in this thread will fail and we will
4819      go back to "again:", but without knowing that the trip back
4820      there was based on an undefined comparison.  No matter; at least
4821      the other thread won the race and the location is correctly
4822      marked as undefined.  What if it wrote an uninitialised version
4823      of the same value that was there originally, though?
4824
4825      etc etc.  Seems like there's a small corner case in which we
4826      might lose the fact that something's defined -- we're out-raced
4827      in between the "old = * reg" and the "atomically {", _and_ the
4828      other thread is writing in an undefined version of what's
4829      already there.  Well, that seems pretty unlikely.
4830
4831      ---
4832
4833      If we ever need to reinstate it .. code which generates a
4834      definedness test for "expected == old" was removed at r10432 of
4835      this file.
4836   */
4837   if (cas->oldHi == IRTemp_INVALID) {
4838      do_shadow_CAS_single( mce, cas );
4839   } else {
4840      do_shadow_CAS_double( mce, cas );
4841   }
4842}
4843
4844
4845static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4846{
4847   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4848   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4849   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4850   IRAtom *expd_eq_old = NULL;
4851   IROp   opCasCmpEQ;
4852   Int    elemSzB;
4853   IRType elemTy;
4854   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4855
4856   /* single CAS */
4857   tl_assert(cas->oldHi == IRTemp_INVALID);
4858   tl_assert(cas->expdHi == NULL);
4859   tl_assert(cas->dataHi == NULL);
4860
4861   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4862   switch (elemTy) {
4863      case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4864      case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4865      case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4866      case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4867      default: tl_assert(0); /* IR defn disallows any other types */
4868   }
4869
4870   /* 1. fetch data# (the proposed new value) */
4871   tl_assert(isOriginalAtom(mce, cas->dataLo));
4872   vdataLo
4873      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4874   tl_assert(isShadowAtom(mce, vdataLo));
4875   if (otrak) {
4876      bdataLo
4877         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4878      tl_assert(isShadowAtom(mce, bdataLo));
4879   }
4880
4881   /* 2. fetch expected# (what we expect to see at the address) */
4882   tl_assert(isOriginalAtom(mce, cas->expdLo));
4883   vexpdLo
4884      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4885   tl_assert(isShadowAtom(mce, vexpdLo));
4886   if (otrak) {
4887      bexpdLo
4888         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4889      tl_assert(isShadowAtom(mce, bexpdLo));
4890   }
4891
4892   /* 3. check definedness of address */
4893   /* 4. fetch old# from shadow memory; this also checks
4894         addressibility of the address */
4895   voldLo
4896      = assignNew(
4897           'V', mce, elemTy,
4898           expr2vbits_Load(
4899              mce,
4900              cas->end, elemTy, cas->addr, 0/*Addr bias*/
4901        ));
4902   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4903   if (otrak) {
4904      boldLo
4905         = assignNew('B', mce, Ity_I32,
4906                     gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4907      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4908   }
4909
4910   /* 5. the CAS itself */
4911   stmt( 'C', mce, IRStmt_CAS(cas) );
4912
4913   /* 6. compute "expected == old" */
4914   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4915   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4916      tree, but it's not copied from the input block. */
4917   expd_eq_old
4918      = assignNew('C', mce, Ity_I1,
4919                  binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4920
4921   /* 7. if "expected == old"
4922            store data# to shadow memory */
4923   do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4924                    NULL/*data*/, vdataLo/*vdata*/,
4925                    expd_eq_old/*guard for store*/ );
4926   if (otrak) {
4927      gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4928                   bdataLo/*bdata*/,
4929                   expd_eq_old/*guard for store*/ );
4930   }
4931}
4932
4933
4934static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4935{
4936   IRAtom *vdataHi = NULL, *bdataHi = NULL;
4937   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4938   IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4939   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4940   IRAtom *voldHi  = NULL, *boldHi  = NULL;
4941   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4942   IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4943   IRAtom *expd_eq_old = NULL, *zero = NULL;
4944   IROp   opCasCmpEQ, opOr, opXor;
4945   Int    elemSzB, memOffsLo, memOffsHi;
4946   IRType elemTy;
4947   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4948
4949   /* double CAS */
4950   tl_assert(cas->oldHi != IRTemp_INVALID);
4951   tl_assert(cas->expdHi != NULL);
4952   tl_assert(cas->dataHi != NULL);
4953
4954   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4955   switch (elemTy) {
4956      case Ity_I8:
4957         opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4958         elemSzB = 1; zero = mkU8(0);
4959         break;
4960      case Ity_I16:
4961         opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4962         elemSzB = 2; zero = mkU16(0);
4963         break;
4964      case Ity_I32:
4965         opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4966         elemSzB = 4; zero = mkU32(0);
4967         break;
4968      case Ity_I64:
4969         opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4970         elemSzB = 8; zero = mkU64(0);
4971         break;
4972      default:
4973         tl_assert(0); /* IR defn disallows any other types */
4974   }
4975
4976   /* 1. fetch data# (the proposed new value) */
4977   tl_assert(isOriginalAtom(mce, cas->dataHi));
4978   tl_assert(isOriginalAtom(mce, cas->dataLo));
4979   vdataHi
4980      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4981   vdataLo
4982      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4983   tl_assert(isShadowAtom(mce, vdataHi));
4984   tl_assert(isShadowAtom(mce, vdataLo));
4985   if (otrak) {
4986      bdataHi
4987         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4988      bdataLo
4989         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4990      tl_assert(isShadowAtom(mce, bdataHi));
4991      tl_assert(isShadowAtom(mce, bdataLo));
4992   }
4993
4994   /* 2. fetch expected# (what we expect to see at the address) */
4995   tl_assert(isOriginalAtom(mce, cas->expdHi));
4996   tl_assert(isOriginalAtom(mce, cas->expdLo));
4997   vexpdHi
4998      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4999   vexpdLo
5000      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
5001   tl_assert(isShadowAtom(mce, vexpdHi));
5002   tl_assert(isShadowAtom(mce, vexpdLo));
5003   if (otrak) {
5004      bexpdHi
5005         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
5006      bexpdLo
5007         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
5008      tl_assert(isShadowAtom(mce, bexpdHi));
5009      tl_assert(isShadowAtom(mce, bexpdLo));
5010   }
5011
5012   /* 3. check definedness of address */
5013   /* 4. fetch old# from shadow memory; this also checks
5014         addressibility of the address */
5015   if (cas->end == Iend_LE) {
5016      memOffsLo = 0;
5017      memOffsHi = elemSzB;
5018   } else {
5019      tl_assert(cas->end == Iend_BE);
5020      memOffsLo = elemSzB;
5021      memOffsHi = 0;
5022   }
5023   voldHi
5024      = assignNew(
5025           'V', mce, elemTy,
5026           expr2vbits_Load(
5027              mce,
5028              cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
5029        ));
5030   voldLo
5031      = assignNew(
5032           'V', mce, elemTy,
5033           expr2vbits_Load(
5034              mce,
5035              cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
5036        ));
5037   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
5038   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
5039   if (otrak) {
5040      boldHi
5041         = assignNew('B', mce, Ity_I32,
5042                     gen_load_b(mce, elemSzB, cas->addr,
5043                                memOffsHi/*addr bias*/));
5044      boldLo
5045         = assignNew('B', mce, Ity_I32,
5046                     gen_load_b(mce, elemSzB, cas->addr,
5047                                memOffsLo/*addr bias*/));
5048      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
5049      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
5050   }
5051
5052   /* 5. the CAS itself */
5053   stmt( 'C', mce, IRStmt_CAS(cas) );
5054
5055   /* 6. compute "expected == old" */
5056   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
5057   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
5058      tree, but it's not copied from the input block. */
5059   /*
5060      xHi = oldHi ^ expdHi;
5061      xLo = oldLo ^ expdLo;
5062      xHL = xHi | xLo;
5063      expd_eq_old = xHL == 0;
5064   */
5065   xHi = assignNew('C', mce, elemTy,
5066                   binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
5067   xLo = assignNew('C', mce, elemTy,
5068                   binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
5069   xHL = assignNew('C', mce, elemTy,
5070                   binop(opOr, xHi, xLo));
5071   expd_eq_old
5072      = assignNew('C', mce, Ity_I1,
5073                  binop(opCasCmpEQ, xHL, zero));
5074
5075   /* 7. if "expected == old"
5076            store data# to shadow memory */
5077   do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
5078                    NULL/*data*/, vdataHi/*vdata*/,
5079                    expd_eq_old/*guard for store*/ );
5080   do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
5081                    NULL/*data*/, vdataLo/*vdata*/,
5082                    expd_eq_old/*guard for store*/ );
5083   if (otrak) {
5084      gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
5085                   bdataHi/*bdata*/,
5086                   expd_eq_old/*guard for store*/ );
5087      gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
5088                   bdataLo/*bdata*/,
5089                   expd_eq_old/*guard for store*/ );
5090   }
5091}
5092
5093
5094/* ------ Dealing with LL/SC (not difficult) ------ */
5095
5096static void do_shadow_LLSC ( MCEnv*    mce,
5097                             IREndness stEnd,
5098                             IRTemp    stResult,
5099                             IRExpr*   stAddr,
5100                             IRExpr*   stStoredata )
5101{
5102   /* In short: treat a load-linked like a normal load followed by an
5103      assignment of the loaded (shadow) data to the result temporary.
5104      Treat a store-conditional like a normal store, and mark the
5105      result temporary as defined. */
5106   IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
5107   IRTemp resTmp = findShadowTmpV(mce, stResult);
5108
5109   tl_assert(isIRAtom(stAddr));
5110   if (stStoredata)
5111      tl_assert(isIRAtom(stStoredata));
5112
5113   if (stStoredata == NULL) {
5114      /* Load Linked */
5115      /* Just treat this as a normal load, followed by an assignment of
5116         the value to .result. */
5117      /* Stay sane */
5118      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5119                || resTy == Ity_I16 || resTy == Ity_I8);
5120      assign( 'V', mce, resTmp,
5121                   expr2vbits_Load(
5122                      mce, stEnd, resTy, stAddr, 0/*addr bias*/));
5123   } else {
5124      /* Store Conditional */
5125      /* Stay sane */
5126      IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
5127                                   stStoredata);
5128      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
5129                || dataTy == Ity_I16 || dataTy == Ity_I8);
5130      do_shadow_Store( mce, stEnd,
5131                            stAddr, 0/* addr bias */,
5132                            stStoredata,
5133                            NULL /* shadow data */,
5134                            NULL/*guard*/ );
5135      /* This is a store conditional, so it writes to .result a value
5136         indicating whether or not the store succeeded.  Just claim
5137         this value is always defined.  In the PowerPC interpretation
5138         of store-conditional, definedness of the success indication
5139         depends on whether the address of the store matches the
5140         reservation address.  But we can't tell that here (and
5141         anyway, we're not being PowerPC-specific).  At least we are
5142         guaranteed that the definedness of the store address, and its
5143         addressibility, will be checked as per normal.  So it seems
5144         pretty safe to just say that the success indication is always
5145         defined.
5146
5147         In schemeS, for origin tracking, we must correspondingly set
5148         a no-origin value for the origin shadow of .result.
5149      */
5150      tl_assert(resTy == Ity_I1);
5151      assign( 'V', mce, resTmp, definedOfType(resTy) );
5152   }
5153}
5154
5155
5156/*------------------------------------------------------------*/
5157/*--- Memcheck main                                        ---*/
5158/*------------------------------------------------------------*/
5159
5160static void schemeS ( MCEnv* mce, IRStmt* st );
5161
5162static Bool isBogusAtom ( IRAtom* at )
5163{
5164   ULong n = 0;
5165   IRConst* con;
5166   tl_assert(isIRAtom(at));
5167   if (at->tag == Iex_RdTmp)
5168      return False;
5169   tl_assert(at->tag == Iex_Const);
5170   con = at->Iex.Const.con;
5171   switch (con->tag) {
5172      case Ico_U1:   return False;
5173      case Ico_U8:   n = (ULong)con->Ico.U8; break;
5174      case Ico_U16:  n = (ULong)con->Ico.U16; break;
5175      case Ico_U32:  n = (ULong)con->Ico.U32; break;
5176      case Ico_U64:  n = (ULong)con->Ico.U64; break;
5177      case Ico_F64:  return False;
5178      case Ico_F32i: return False;
5179      case Ico_F64i: return False;
5180      case Ico_V128: return False;
5181      default: ppIRExpr(at); tl_assert(0);
5182   }
5183   /* VG_(printf)("%llx\n", n); */
5184   return (/*32*/    n == 0xFEFEFEFFULL
5185           /*32*/ || n == 0x80808080ULL
5186           /*32*/ || n == 0x7F7F7F7FULL
5187           /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
5188           /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
5189           /*64*/ || n == 0x0000000000008080ULL
5190           /*64*/ || n == 0x8080808080808080ULL
5191           /*64*/ || n == 0x0101010101010101ULL
5192          );
5193}
5194
5195static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
5196{
5197   Int      i;
5198   IRExpr*  e;
5199   IRDirty* d;
5200   IRCAS*   cas;
5201   switch (st->tag) {
5202      case Ist_WrTmp:
5203         e = st->Ist.WrTmp.data;
5204         switch (e->tag) {
5205            case Iex_Get:
5206            case Iex_RdTmp:
5207               return False;
5208            case Iex_Const:
5209               return isBogusAtom(e);
5210            case Iex_Unop:
5211               return isBogusAtom(e->Iex.Unop.arg);
5212            case Iex_GetI:
5213               return isBogusAtom(e->Iex.GetI.ix);
5214            case Iex_Binop:
5215               return isBogusAtom(e->Iex.Binop.arg1)
5216                      || isBogusAtom(e->Iex.Binop.arg2);
5217            case Iex_Triop:
5218               return isBogusAtom(e->Iex.Triop.details->arg1)
5219                      || isBogusAtom(e->Iex.Triop.details->arg2)
5220                      || isBogusAtom(e->Iex.Triop.details->arg3);
5221            case Iex_Qop:
5222               return isBogusAtom(e->Iex.Qop.details->arg1)
5223                      || isBogusAtom(e->Iex.Qop.details->arg2)
5224                      || isBogusAtom(e->Iex.Qop.details->arg3)
5225                      || isBogusAtom(e->Iex.Qop.details->arg4);
5226            case Iex_Mux0X:
5227               return isBogusAtom(e->Iex.Mux0X.cond)
5228                      || isBogusAtom(e->Iex.Mux0X.expr0)
5229                      || isBogusAtom(e->Iex.Mux0X.exprX);
5230            case Iex_Load:
5231               return isBogusAtom(e->Iex.Load.addr);
5232            case Iex_CCall:
5233               for (i = 0; e->Iex.CCall.args[i]; i++)
5234                  if (isBogusAtom(e->Iex.CCall.args[i]))
5235                     return True;
5236               return False;
5237            default:
5238               goto unhandled;
5239         }
5240      case Ist_Dirty:
5241         d = st->Ist.Dirty.details;
5242         for (i = 0; d->args[i]; i++)
5243            if (isBogusAtom(d->args[i]))
5244               return True;
5245         if (d->guard && isBogusAtom(d->guard))
5246            return True;
5247         if (d->mAddr && isBogusAtom(d->mAddr))
5248            return True;
5249         return False;
5250      case Ist_Put:
5251         return isBogusAtom(st->Ist.Put.data);
5252      case Ist_PutI:
5253         return isBogusAtom(st->Ist.PutI.details->ix)
5254                || isBogusAtom(st->Ist.PutI.details->data);
5255      case Ist_Store:
5256         return isBogusAtom(st->Ist.Store.addr)
5257                || isBogusAtom(st->Ist.Store.data);
5258      case Ist_Exit:
5259         return isBogusAtom(st->Ist.Exit.guard);
5260      case Ist_AbiHint:
5261         return isBogusAtom(st->Ist.AbiHint.base)
5262                || isBogusAtom(st->Ist.AbiHint.nia);
5263      case Ist_NoOp:
5264      case Ist_IMark:
5265      case Ist_MBE:
5266         return False;
5267      case Ist_CAS:
5268         cas = st->Ist.CAS.details;
5269         return isBogusAtom(cas->addr)
5270                || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
5271                || isBogusAtom(cas->expdLo)
5272                || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
5273                || isBogusAtom(cas->dataLo);
5274      case Ist_LLSC:
5275         return isBogusAtom(st->Ist.LLSC.addr)
5276                || (st->Ist.LLSC.storedata
5277                       ? isBogusAtom(st->Ist.LLSC.storedata)
5278                       : False);
5279      default:
5280      unhandled:
5281         ppIRStmt(st);
5282         VG_(tool_panic)("hasBogusLiterals");
5283   }
5284}
5285
5286
5287IRSB* MC_(instrument) ( VgCallbackClosure* closure,
5288                        IRSB* sb_in,
5289                        VexGuestLayout* layout,
5290                        VexGuestExtents* vge,
5291                        IRType gWordTy, IRType hWordTy )
5292{
5293   Bool    verboze = 0||False;
5294   Bool    bogus;
5295   Int     i, j, first_stmt;
5296   IRStmt* st;
5297   MCEnv   mce;
5298   IRSB*   sb_out;
5299
5300   if (gWordTy != hWordTy) {
5301      /* We don't currently support this case. */
5302      VG_(tool_panic)("host/guest word size mismatch");
5303   }
5304
5305   /* Check we're not completely nuts */
5306   tl_assert(sizeof(UWord)  == sizeof(void*));
5307   tl_assert(sizeof(Word)   == sizeof(void*));
5308   tl_assert(sizeof(Addr)   == sizeof(void*));
5309   tl_assert(sizeof(ULong)  == 8);
5310   tl_assert(sizeof(Long)   == 8);
5311   tl_assert(sizeof(Addr64) == 8);
5312   tl_assert(sizeof(UInt)   == 4);
5313   tl_assert(sizeof(Int)    == 4);
5314
5315   tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
5316
5317   /* Set up SB */
5318   sb_out = deepCopyIRSBExceptStmts(sb_in);
5319
5320   /* Set up the running environment.  Both .sb and .tmpMap are
5321      modified as we go along.  Note that tmps are added to both
5322      .sb->tyenv and .tmpMap together, so the valid index-set for
5323      those two arrays should always be identical. */
5324   VG_(memset)(&mce, 0, sizeof(mce));
5325   mce.sb             = sb_out;
5326   mce.trace          = verboze;
5327   mce.layout         = layout;
5328   mce.hWordTy        = hWordTy;
5329   mce.bogusLiterals  = False;
5330
5331   /* Do expensive interpretation for Iop_Add32 and Iop_Add64 on
5332      Darwin.  10.7 is mostly built with LLVM, which uses these for
5333      bitfield inserts, and we get a lot of false errors if the cheap
5334      interpretation is used, alas.  Could solve this much better if
5335      we knew which of such adds came from x86/amd64 LEA instructions,
5336      since these are the only ones really needing the expensive
5337      interpretation, but that would require some way to tag them in
5338      the _toIR.c front ends, which is a lot of faffing around.  So
5339      for now just use the slow and blunt-instrument solution. */
5340   mce.useLLVMworkarounds = False;
5341#  if defined(VGO_darwin)
5342   mce.useLLVMworkarounds = True;
5343#  endif
5344
5345   mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
5346                            sizeof(TempMapEnt));
5347   for (i = 0; i < sb_in->tyenv->types_used; i++) {
5348      TempMapEnt ent;
5349      ent.kind    = Orig;
5350      ent.shadowV = IRTemp_INVALID;
5351      ent.shadowB = IRTemp_INVALID;
5352      VG_(addToXA)( mce.tmpMap, &ent );
5353   }
5354   tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
5355
5356   /* Make a preliminary inspection of the statements, to see if there
5357      are any dodgy-looking literals.  If there are, we generate
5358      extra-detailed (hence extra-expensive) instrumentation in
5359      places.  Scan the whole bb even if dodgyness is found earlier,
5360      so that the flatness assertion is applied to all stmts. */
5361
5362   bogus = False;
5363
5364   for (i = 0; i < sb_in->stmts_used; i++) {
5365
5366      st = sb_in->stmts[i];
5367      tl_assert(st);
5368      tl_assert(isFlatIRStmt(st));
5369
5370      if (!bogus) {
5371         bogus = checkForBogusLiterals(st);
5372         if (0 && bogus) {
5373            VG_(printf)("bogus: ");
5374            ppIRStmt(st);
5375            VG_(printf)("\n");
5376         }
5377      }
5378
5379   }
5380
5381   mce.bogusLiterals = bogus;
5382
5383   /* Copy verbatim any IR preamble preceding the first IMark */
5384
5385   tl_assert(mce.sb == sb_out);
5386   tl_assert(mce.sb != sb_in);
5387
5388   i = 0;
5389   while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
5390
5391      st = sb_in->stmts[i];
5392      tl_assert(st);
5393      tl_assert(isFlatIRStmt(st));
5394
5395      stmt( 'C', &mce, sb_in->stmts[i] );
5396      i++;
5397   }
5398
5399   /* Nasty problem.  IR optimisation of the pre-instrumented IR may
5400      cause the IR following the preamble to contain references to IR
5401      temporaries defined in the preamble.  Because the preamble isn't
5402      instrumented, these temporaries don't have any shadows.
5403      Nevertheless uses of them following the preamble will cause
5404      memcheck to generate references to their shadows.  End effect is
5405      to cause IR sanity check failures, due to references to
5406      non-existent shadows.  This is only evident for the complex
5407      preambles used for function wrapping on TOC-afflicted platforms
5408      (ppc64-linux).
5409
5410      The following loop therefore scans the preamble looking for
5411      assignments to temporaries.  For each one found it creates an
5412      assignment to the corresponding (V) shadow temp, marking it as
5413      'defined'.  This is the same resulting IR as if the main
5414      instrumentation loop before had been applied to the statement
5415      'tmp = CONSTANT'.
5416
5417      Similarly, if origin tracking is enabled, we must generate an
5418      assignment for the corresponding origin (B) shadow, claiming
5419      no-origin, as appropriate for a defined value.
5420   */
5421   for (j = 0; j < i; j++) {
5422      if (sb_in->stmts[j]->tag == Ist_WrTmp) {
5423         /* findShadowTmpV checks its arg is an original tmp;
5424            no need to assert that here. */
5425         IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
5426         IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
5427         IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
5428         assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
5429         if (MC_(clo_mc_level) == 3) {
5430            IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
5431            tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
5432            assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
5433         }
5434         if (0) {
5435            VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
5436            ppIRType( ty_v );
5437            VG_(printf)("\n");
5438         }
5439      }
5440   }
5441
5442   /* Iterate over the remaining stmts to generate instrumentation. */
5443
5444   tl_assert(sb_in->stmts_used > 0);
5445   tl_assert(i >= 0);
5446   tl_assert(i < sb_in->stmts_used);
5447   tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
5448
5449   for (/* use current i*/; i < sb_in->stmts_used; i++) {
5450
5451      st = sb_in->stmts[i];
5452      first_stmt = sb_out->stmts_used;
5453
5454      if (verboze) {
5455         VG_(printf)("\n");
5456         ppIRStmt(st);
5457         VG_(printf)("\n");
5458      }
5459
5460      if (MC_(clo_mc_level) == 3) {
5461         /* See comments on case Ist_CAS below. */
5462         if (st->tag != Ist_CAS)
5463            schemeS( &mce, st );
5464      }
5465
5466      /* Generate instrumentation code for each stmt ... */
5467
5468      switch (st->tag) {
5469
5470         case Ist_WrTmp:
5471            assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
5472                               expr2vbits( &mce, st->Ist.WrTmp.data) );
5473            break;
5474
5475         case Ist_Put:
5476            do_shadow_PUT( &mce,
5477                           st->Ist.Put.offset,
5478                           st->Ist.Put.data,
5479                           NULL /* shadow atom */, NULL /* guard */ );
5480            break;
5481
5482         case Ist_PutI:
5483            do_shadow_PUTI( &mce, st->Ist.PutI.details);
5484            break;
5485
5486         case Ist_Store:
5487            do_shadow_Store( &mce, st->Ist.Store.end,
5488                                   st->Ist.Store.addr, 0/* addr bias */,
5489                                   st->Ist.Store.data,
5490                                   NULL /* shadow data */,
5491                                   NULL/*guard*/ );
5492            break;
5493
5494         case Ist_Exit:
5495            complainIfUndefined( &mce, st->Ist.Exit.guard, NULL );
5496            break;
5497
5498         case Ist_IMark:
5499            break;
5500
5501         case Ist_NoOp:
5502         case Ist_MBE:
5503            break;
5504
5505         case Ist_Dirty:
5506            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
5507            break;
5508
5509         case Ist_AbiHint:
5510            do_AbiHint( &mce, st->Ist.AbiHint.base,
5511                              st->Ist.AbiHint.len,
5512                              st->Ist.AbiHint.nia );
5513            break;
5514
5515         case Ist_CAS:
5516            do_shadow_CAS( &mce, st->Ist.CAS.details );
5517            /* Note, do_shadow_CAS copies the CAS itself to the output
5518               block, because it needs to add instrumentation both
5519               before and after it.  Hence skip the copy below.  Also
5520               skip the origin-tracking stuff (call to schemeS) above,
5521               since that's all tangled up with it too; do_shadow_CAS
5522               does it all. */
5523            break;
5524
5525         case Ist_LLSC:
5526            do_shadow_LLSC( &mce,
5527                            st->Ist.LLSC.end,
5528                            st->Ist.LLSC.result,
5529                            st->Ist.LLSC.addr,
5530                            st->Ist.LLSC.storedata );
5531            break;
5532
5533         default:
5534            VG_(printf)("\n");
5535            ppIRStmt(st);
5536            VG_(printf)("\n");
5537            VG_(tool_panic)("memcheck: unhandled IRStmt");
5538
5539      } /* switch (st->tag) */
5540
5541      if (0 && verboze) {
5542         for (j = first_stmt; j < sb_out->stmts_used; j++) {
5543            VG_(printf)("   ");
5544            ppIRStmt(sb_out->stmts[j]);
5545            VG_(printf)("\n");
5546         }
5547         VG_(printf)("\n");
5548      }
5549
5550      /* ... and finally copy the stmt itself to the output.  Except,
5551         skip the copy of IRCASs; see comments on case Ist_CAS
5552         above. */
5553      if (st->tag != Ist_CAS)
5554         stmt('C', &mce, st);
5555   }
5556
5557   /* Now we need to complain if the jump target is undefined. */
5558   first_stmt = sb_out->stmts_used;
5559
5560   if (verboze) {
5561      VG_(printf)("sb_in->next = ");
5562      ppIRExpr(sb_in->next);
5563      VG_(printf)("\n\n");
5564   }
5565
5566   complainIfUndefined( &mce, sb_in->next, NULL );
5567
5568   if (0 && verboze) {
5569      for (j = first_stmt; j < sb_out->stmts_used; j++) {
5570         VG_(printf)("   ");
5571         ppIRStmt(sb_out->stmts[j]);
5572         VG_(printf)("\n");
5573      }
5574      VG_(printf)("\n");
5575   }
5576
5577   /* If this fails, there's been some serious snafu with tmp management,
5578      that should be investigated. */
5579   tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
5580   VG_(deleteXA)( mce.tmpMap );
5581
5582   tl_assert(mce.sb == sb_out);
5583   return sb_out;
5584}
5585
5586/*------------------------------------------------------------*/
5587/*--- Post-tree-build final tidying                        ---*/
5588/*------------------------------------------------------------*/
5589
5590/* This exploits the observation that Memcheck often produces
5591   repeated conditional calls of the form
5592
5593   Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
5594
5595   with the same guard expression G guarding the same helper call.
5596   The second and subsequent calls are redundant.  This usually
5597   results from instrumentation of guest code containing multiple
5598   memory references at different constant offsets from the same base
5599   register.  After optimisation of the instrumentation, you get a
5600   test for the definedness of the base register for each memory
5601   reference, which is kinda pointless.  MC_(final_tidy) therefore
5602   looks for such repeated calls and removes all but the first. */
5603
5604/* A struct for recording which (helper, guard) pairs we have already
5605   seen. */
5606typedef
5607   struct { void* entry; IRExpr* guard; }
5608   Pair;
5609
5610/* Return True if e1 and e2 definitely denote the same value (used to
5611   compare guards).  Return False if unknown; False is the safe
5612   answer.  Since guest registers and guest memory do not have the
5613   SSA property we must return False if any Gets or Loads appear in
5614   the expression. */
5615
5616static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
5617{
5618   if (e1->tag != e2->tag)
5619      return False;
5620   switch (e1->tag) {
5621      case Iex_Const:
5622         return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
5623      case Iex_Binop:
5624         return e1->Iex.Binop.op == e2->Iex.Binop.op
5625                && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
5626                && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
5627      case Iex_Unop:
5628         return e1->Iex.Unop.op == e2->Iex.Unop.op
5629                && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
5630      case Iex_RdTmp:
5631         return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
5632      case Iex_Mux0X:
5633         return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
5634                && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
5635                && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
5636      case Iex_Qop:
5637      case Iex_Triop:
5638      case Iex_CCall:
5639         /* be lazy.  Could define equality for these, but they never
5640            appear to be used. */
5641         return False;
5642      case Iex_Get:
5643      case Iex_GetI:
5644      case Iex_Load:
5645         /* be conservative - these may not give the same value each
5646            time */
5647         return False;
5648      case Iex_Binder:
5649         /* should never see this */
5650         /* fallthrough */
5651      default:
5652         VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5653         ppIRExpr(e1);
5654         VG_(tool_panic)("memcheck:sameIRValue");
5655         return False;
5656   }
5657}
5658
5659/* See if 'pairs' already has an entry for (entry, guard).  Return
5660   True if so.  If not, add an entry. */
5661
5662static
5663Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5664{
5665   Pair  p;
5666   Pair* pp;
5667   Int   i, n = VG_(sizeXA)( pairs );
5668   for (i = 0; i < n; i++) {
5669      pp = VG_(indexXA)( pairs, i );
5670      if (pp->entry == entry && sameIRValue(pp->guard, guard))
5671         return True;
5672   }
5673   p.guard = guard;
5674   p.entry = entry;
5675   VG_(addToXA)( pairs, &p );
5676   return False;
5677}
5678
5679static Bool is_helperc_value_checkN_fail ( HChar* name )
5680{
5681   return
5682      0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5683      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5684      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5685      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5686      || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5687      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5688      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5689      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5690}
5691
5692IRSB* MC_(final_tidy) ( IRSB* sb_in )
5693{
5694   Int i;
5695   IRStmt*   st;
5696   IRDirty*  di;
5697   IRExpr*   guard;
5698   IRCallee* cee;
5699   Bool      alreadyPresent;
5700   XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5701                                 VG_(free), sizeof(Pair) );
5702   /* Scan forwards through the statements.  Each time a call to one
5703      of the relevant helpers is seen, check if we have made a
5704      previous call to the same helper using the same guard
5705      expression, and if so, delete the call. */
5706   for (i = 0; i < sb_in->stmts_used; i++) {
5707      st = sb_in->stmts[i];
5708      tl_assert(st);
5709      if (st->tag != Ist_Dirty)
5710         continue;
5711      di = st->Ist.Dirty.details;
5712      guard = di->guard;
5713      if (!guard)
5714         continue;
5715      if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5716      cee = di->cee;
5717      if (!is_helperc_value_checkN_fail( cee->name ))
5718         continue;
5719       /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5720          guard 'guard'.  Check if we have already seen a call to this
5721          function with the same guard.  If so, delete it.  If not,
5722          add it to the set of calls we do know about. */
5723      alreadyPresent = check_or_add( pairs, guard, cee->addr );
5724      if (alreadyPresent) {
5725         sb_in->stmts[i] = IRStmt_NoOp();
5726         if (0) VG_(printf)("XX\n");
5727      }
5728   }
5729   VG_(deleteXA)( pairs );
5730   return sb_in;
5731}
5732
5733
5734/*------------------------------------------------------------*/
5735/*--- Origin tracking stuff                                ---*/
5736/*------------------------------------------------------------*/
5737
5738/* Almost identical to findShadowTmpV. */
5739static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5740{
5741   TempMapEnt* ent;
5742   /* VG_(indexXA) range-checks 'orig', hence no need to check
5743      here. */
5744   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5745   tl_assert(ent->kind == Orig);
5746   if (ent->shadowB == IRTemp_INVALID) {
5747      IRTemp tmpB
5748        = newTemp( mce, Ity_I32, BSh );
5749      /* newTemp may cause mce->tmpMap to resize, hence previous results
5750         from VG_(indexXA) are invalid. */
5751      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5752      tl_assert(ent->kind == Orig);
5753      tl_assert(ent->shadowB == IRTemp_INVALID);
5754      ent->shadowB = tmpB;
5755   }
5756   return ent->shadowB;
5757}
5758
5759static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5760{
5761   return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5762}
5763
5764static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5765                            IRAtom* baseaddr, Int offset )
5766{
5767   void*    hFun;
5768   HChar*   hName;
5769   IRTemp   bTmp;
5770   IRDirty* di;
5771   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5772   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5773   IRAtom*  ea    = baseaddr;
5774   if (offset != 0) {
5775      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5776                                   : mkU64( (Long)(Int)offset );
5777      ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5778   }
5779   bTmp = newTemp(mce, mce->hWordTy, BSh);
5780
5781   switch (szB) {
5782      case 1: hFun  = (void*)&MC_(helperc_b_load1);
5783              hName = "MC_(helperc_b_load1)";
5784              break;
5785      case 2: hFun  = (void*)&MC_(helperc_b_load2);
5786              hName = "MC_(helperc_b_load2)";
5787              break;
5788      case 4: hFun  = (void*)&MC_(helperc_b_load4);
5789              hName = "MC_(helperc_b_load4)";
5790              break;
5791      case 8: hFun  = (void*)&MC_(helperc_b_load8);
5792              hName = "MC_(helperc_b_load8)";
5793              break;
5794      case 16: hFun  = (void*)&MC_(helperc_b_load16);
5795               hName = "MC_(helperc_b_load16)";
5796               break;
5797      case 32: hFun  = (void*)&MC_(helperc_b_load32);
5798               hName = "MC_(helperc_b_load32)";
5799               break;
5800      default:
5801         VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5802         tl_assert(0);
5803   }
5804   di = unsafeIRDirty_1_N(
5805           bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5806           mkIRExprVec_1( ea )
5807        );
5808   /* no need to mess with any annotations.  This call accesses
5809      neither guest state nor guest memory. */
5810   stmt( 'B', mce, IRStmt_Dirty(di) );
5811   if (mce->hWordTy == Ity_I64) {
5812      /* 64-bit host */
5813      IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5814      assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5815      return mkexpr(bTmp32);
5816   } else {
5817      /* 32-bit host */
5818      return mkexpr(bTmp);
5819   }
5820}
5821
5822static IRAtom* gen_guarded_load_b ( MCEnv* mce, Int szB, IRAtom* baseaddr,
5823                                    Int offset, IRAtom* guard )
5824{
5825  if (guard) {
5826     IRAtom *cond, *iffalse, *iftrue;
5827
5828     cond    = assignNew('B', mce, Ity_I8, unop(Iop_1Uto8, guard));
5829     iftrue  = assignNew('B', mce, Ity_I32,
5830                         gen_load_b(mce, szB, baseaddr, offset));
5831     iffalse = mkU32(0);
5832
5833     return assignNew('B', mce, Ity_I32, IRExpr_Mux0X(cond, iffalse, iftrue));
5834  }
5835
5836  return gen_load_b(mce, szB, baseaddr, offset);
5837}
5838
5839/* Generate a shadow store.  guard :: Ity_I1 controls whether the
5840   store really happens; NULL means it unconditionally does. */
5841static void gen_store_b ( MCEnv* mce, Int szB,
5842                          IRAtom* baseaddr, Int offset, IRAtom* dataB,
5843                          IRAtom* guard )
5844{
5845   void*    hFun;
5846   HChar*   hName;
5847   IRDirty* di;
5848   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5849   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5850   IRAtom*  ea    = baseaddr;
5851   if (guard) {
5852      tl_assert(isOriginalAtom(mce, guard));
5853      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5854   }
5855   if (offset != 0) {
5856      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5857                                   : mkU64( (Long)(Int)offset );
5858      ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5859   }
5860   if (mce->hWordTy == Ity_I64)
5861      dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5862
5863   switch (szB) {
5864      case 1: hFun  = (void*)&MC_(helperc_b_store1);
5865              hName = "MC_(helperc_b_store1)";
5866              break;
5867      case 2: hFun  = (void*)&MC_(helperc_b_store2);
5868              hName = "MC_(helperc_b_store2)";
5869              break;
5870      case 4: hFun  = (void*)&MC_(helperc_b_store4);
5871              hName = "MC_(helperc_b_store4)";
5872              break;
5873      case 8: hFun  = (void*)&MC_(helperc_b_store8);
5874              hName = "MC_(helperc_b_store8)";
5875              break;
5876      case 16: hFun  = (void*)&MC_(helperc_b_store16);
5877               hName = "MC_(helperc_b_store16)";
5878               break;
5879      case 32: hFun  = (void*)&MC_(helperc_b_store32);
5880               hName = "MC_(helperc_b_store32)";
5881               break;
5882      default:
5883         tl_assert(0);
5884   }
5885   di = unsafeIRDirty_0_N( 2/*regparms*/,
5886           hName, VG_(fnptr_to_fnentry)( hFun ),
5887           mkIRExprVec_2( ea, dataB )
5888        );
5889   /* no need to mess with any annotations.  This call accesses
5890      neither guest state nor guest memory. */
5891   if (guard) di->guard = guard;
5892   stmt( 'B', mce, IRStmt_Dirty(di) );
5893}
5894
5895static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5896   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5897   if (eTy == Ity_I64)
5898      return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5899   if (eTy == Ity_I32)
5900      return e;
5901   tl_assert(0);
5902}
5903
5904static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5905   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5906   tl_assert(eTy == Ity_I32);
5907   if (dstTy == Ity_I64)
5908      return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5909   tl_assert(0);
5910}
5911
5912
5913static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5914{
5915   tl_assert(MC_(clo_mc_level) == 3);
5916
5917   switch (e->tag) {
5918
5919      case Iex_GetI: {
5920         IRRegArray* descr_b;
5921         IRAtom      *t1, *t2, *t3, *t4;
5922         IRRegArray* descr      = e->Iex.GetI.descr;
5923         IRType equivIntTy
5924            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5925         /* If this array is unshadowable for whatever reason, use the
5926            usual approximation. */
5927         if (equivIntTy == Ity_INVALID)
5928            return mkU32(0);
5929         tl_assert(sizeofIRType(equivIntTy) >= 4);
5930         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5931         descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5932                                 equivIntTy, descr->nElems );
5933         /* Do a shadow indexed get of the same size, giving t1.  Take
5934            the bottom 32 bits of it, giving t2.  Compute into t3 the
5935            origin for the index (almost certainly zero, but there's
5936            no harm in being completely general here, since iropt will
5937            remove any useless code), and fold it in, giving a final
5938            value t4. */
5939         t1 = assignNew( 'B', mce, equivIntTy,
5940                          IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5941                                                e->Iex.GetI.bias ));
5942         t2 = narrowTo32( mce, t1 );
5943         t3 = schemeE( mce, e->Iex.GetI.ix );
5944         t4 = gen_maxU32( mce, t2, t3 );
5945         return t4;
5946      }
5947      case Iex_CCall: {
5948         Int i;
5949         IRAtom*  here;
5950         IRExpr** args = e->Iex.CCall.args;
5951         IRAtom*  curr = mkU32(0);
5952         for (i = 0; args[i]; i++) {
5953            tl_assert(i < 32);
5954            tl_assert(isOriginalAtom(mce, args[i]));
5955            /* Only take notice of this arg if the callee's
5956               mc-exclusion mask does not say it is to be excluded. */
5957            if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5958               /* the arg is to be excluded from definedness checking.
5959                  Do nothing. */
5960               if (0) VG_(printf)("excluding %s(%d)\n",
5961                                  e->Iex.CCall.cee->name, i);
5962            } else {
5963               /* calculate the arg's definedness, and pessimistically
5964                  merge it in. */
5965               here = schemeE( mce, args[i] );
5966               curr = gen_maxU32( mce, curr, here );
5967            }
5968         }
5969         return curr;
5970      }
5971      case Iex_Load: {
5972         Int dszB;
5973         dszB = sizeofIRType(e->Iex.Load.ty);
5974         /* assert that the B value for the address is already
5975            available (somewhere) */
5976         tl_assert(isIRAtom(e->Iex.Load.addr));
5977         tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5978         return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5979      }
5980      case Iex_Mux0X: {
5981         IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5982         IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5983         IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5984         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5985      }
5986      case Iex_Qop: {
5987         IRAtom* b1 = schemeE( mce, e->Iex.Qop.details->arg1 );
5988         IRAtom* b2 = schemeE( mce, e->Iex.Qop.details->arg2 );
5989         IRAtom* b3 = schemeE( mce, e->Iex.Qop.details->arg3 );
5990         IRAtom* b4 = schemeE( mce, e->Iex.Qop.details->arg4 );
5991         return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5992                                 gen_maxU32( mce, b3, b4 ) );
5993      }
5994      case Iex_Triop: {
5995         IRAtom* b1 = schemeE( mce, e->Iex.Triop.details->arg1 );
5996         IRAtom* b2 = schemeE( mce, e->Iex.Triop.details->arg2 );
5997         IRAtom* b3 = schemeE( mce, e->Iex.Triop.details->arg3 );
5998         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5999      }
6000      case Iex_Binop: {
6001         switch (e->Iex.Binop.op) {
6002            case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
6003            case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
6004            case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
6005            case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
6006               /* Just say these all produce a defined result,
6007                  regardless of their arguments.  See
6008                  COMMENT_ON_CasCmpEQ in this file. */
6009               return mkU32(0);
6010            default: {
6011               IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
6012               IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
6013               return gen_maxU32( mce, b1, b2 );
6014            }
6015         }
6016         tl_assert(0);
6017         /*NOTREACHED*/
6018      }
6019      case Iex_Unop: {
6020         IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
6021         return b1;
6022      }
6023      case Iex_Const:
6024         return mkU32(0);
6025      case Iex_RdTmp:
6026         return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
6027      case Iex_Get: {
6028         Int b_offset = MC_(get_otrack_shadow_offset)(
6029                           e->Iex.Get.offset,
6030                           sizeofIRType(e->Iex.Get.ty)
6031                        );
6032         tl_assert(b_offset >= -1
6033                   && b_offset <= mce->layout->total_sizeB -4);
6034         if (b_offset >= 0) {
6035            /* FIXME: this isn't an atom! */
6036            return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
6037                               Ity_I32 );
6038         }
6039         return mkU32(0);
6040      }
6041      default:
6042         VG_(printf)("mc_translate.c: schemeE: unhandled: ");
6043         ppIRExpr(e);
6044         VG_(tool_panic)("memcheck:schemeE");
6045   }
6046}
6047
6048
6049static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
6050{
6051   // This is a hacked version of do_shadow_Dirty
6052   Int       i, k, n, toDo, gSz, gOff;
6053   IRAtom    *here, *curr;
6054   IRTemp    dst;
6055
6056   /* First check the guard. */
6057   curr = schemeE( mce, d->guard );
6058
6059   /* Now round up all inputs and maxU32 over them. */
6060
6061   /* Inputs: unmasked args
6062      Note: arguments are evaluated REGARDLESS of the guard expression */
6063   for (i = 0; d->args[i]; i++) {
6064      if (d->cee->mcx_mask & (1<<i)) {
6065         /* ignore this arg */
6066      } else {
6067         here = schemeE( mce, d->args[i] );
6068         curr = gen_maxU32( mce, curr, here );
6069      }
6070   }
6071
6072   /* Inputs: guest state that we read. */
6073   for (i = 0; i < d->nFxState; i++) {
6074      tl_assert(d->fxState[i].fx != Ifx_None);
6075      if (d->fxState[i].fx == Ifx_Write)
6076         continue;
6077
6078      /* Enumerate the described state segments */
6079      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6080         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6081         gSz  = d->fxState[i].size;
6082
6083         /* Ignore any sections marked as 'always defined'. */
6084         if (isAlwaysDefd(mce, gOff, gSz)) {
6085            if (0)
6086            VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
6087                        gOff, gSz);
6088            continue;
6089         }
6090
6091         /* This state element is read or modified.  So we need to
6092            consider it.  If larger than 4 bytes, deal with it in
6093            4-byte chunks. */
6094         while (True) {
6095            Int b_offset;
6096            tl_assert(gSz >= 0);
6097            if (gSz == 0) break;
6098            n = gSz <= 4 ? gSz : 4;
6099            /* update 'curr' with maxU32 of the state slice
6100               gOff .. gOff+n-1 */
6101            b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6102            if (b_offset != -1) {
6103               /* Observe the guard expression. If it is false use 0, i.e.
6104                  nothing is known about the origin */
6105               IRAtom *cond, *iffalse, *iftrue;
6106
6107               cond = assignNew( 'B', mce, Ity_I8, unop(Iop_1Uto8, d->guard));
6108               iffalse = mkU32(0);
6109               iftrue  = assignNew( 'B', mce, Ity_I32,
6110                                    IRExpr_Get(b_offset
6111                                                 + 2*mce->layout->total_sizeB,
6112                                               Ity_I32));
6113               here = assignNew( 'B', mce, Ity_I32,
6114                                 IRExpr_Mux0X(cond, iffalse, iftrue));
6115               curr = gen_maxU32( mce, curr, here );
6116            }
6117            gSz -= n;
6118            gOff += n;
6119         }
6120      }
6121   }
6122
6123   /* Inputs: memory */
6124
6125   if (d->mFx != Ifx_None) {
6126      /* Because we may do multiple shadow loads/stores from the same
6127         base address, it's best to do a single test of its
6128         definedness right now.  Post-instrumentation optimisation
6129         should remove all but this test. */
6130      tl_assert(d->mAddr);
6131      here = schemeE( mce, d->mAddr );
6132      curr = gen_maxU32( mce, curr, here );
6133   }
6134
6135   /* Deal with memory inputs (reads or modifies) */
6136   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
6137      toDo   = d->mSize;
6138      /* chew off 32-bit chunks.  We don't care about the endianness
6139         since it's all going to be condensed down to a single bit,
6140         but nevertheless choose an endianness which is hopefully
6141         native to the platform. */
6142      while (toDo >= 4) {
6143         here = gen_guarded_load_b( mce, 4, d->mAddr, d->mSize - toDo,
6144                                    d->guard );
6145         curr = gen_maxU32( mce, curr, here );
6146         toDo -= 4;
6147      }
6148      /* handle possible 16-bit excess */
6149      while (toDo >= 2) {
6150         here = gen_guarded_load_b( mce, 2, d->mAddr, d->mSize - toDo,
6151                                    d->guard );
6152         curr = gen_maxU32( mce, curr, here );
6153         toDo -= 2;
6154      }
6155      /* chew off the remaining 8-bit chunk, if any */
6156      if (toDo == 1) {
6157         here = gen_guarded_load_b( mce, 1, d->mAddr, d->mSize - toDo,
6158                                    d->guard );
6159         curr = gen_maxU32( mce, curr, here );
6160         toDo -= 1;
6161      }
6162      tl_assert(toDo == 0);
6163   }
6164
6165   /* Whew!  So curr is a 32-bit B-value which should give an origin
6166      of some use if any of the inputs to the helper are undefined.
6167      Now we need to re-distribute the results to all destinations. */
6168
6169   /* Outputs: the destination temporary, if there is one. */
6170   if (d->tmp != IRTemp_INVALID) {
6171      dst   = findShadowTmpB(mce, d->tmp);
6172      assign( 'V', mce, dst, curr );
6173   }
6174
6175   /* Outputs: guest state that we write or modify. */
6176   for (i = 0; i < d->nFxState; i++) {
6177      tl_assert(d->fxState[i].fx != Ifx_None);
6178      if (d->fxState[i].fx == Ifx_Read)
6179         continue;
6180
6181      /* Enumerate the described state segments */
6182      for (k = 0; k < 1 + d->fxState[i].nRepeats; k++) {
6183         gOff = d->fxState[i].offset + k * d->fxState[i].repeatLen;
6184         gSz  = d->fxState[i].size;
6185
6186         /* Ignore any sections marked as 'always defined'. */
6187         if (isAlwaysDefd(mce, gOff, gSz))
6188            continue;
6189
6190         /* This state element is written or modified.  So we need to
6191            consider it.  If larger than 4 bytes, deal with it in
6192            4-byte chunks. */
6193         while (True) {
6194            Int b_offset;
6195            tl_assert(gSz >= 0);
6196            if (gSz == 0) break;
6197            n = gSz <= 4 ? gSz : 4;
6198            /* Write 'curr' to the state slice gOff .. gOff+n-1 */
6199            b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
6200            if (b_offset != -1) {
6201               if (d->guard) {
6202                  /* If the guard expression evaluates to false we simply Put
6203                     the value that is already stored in the guest state slot */
6204                  IRAtom *cond, *iffalse;
6205
6206                  cond    = assignNew('B', mce, Ity_I8,
6207                                      unop(Iop_1Uto8, d->guard));
6208                  iffalse = assignNew('B', mce, Ity_I32,
6209                                      IRExpr_Get(b_offset +
6210                                                 2*mce->layout->total_sizeB,
6211                                                 Ity_I32));
6212                  curr = assignNew('V', mce, Ity_I32,
6213                                   IRExpr_Mux0X(cond, iffalse, curr));
6214               }
6215               stmt( 'B', mce, IRStmt_Put(b_offset
6216                                             + 2*mce->layout->total_sizeB,
6217                                          curr ));
6218            }
6219            gSz -= n;
6220            gOff += n;
6221         }
6222      }
6223   }
6224
6225   /* Outputs: memory that we write or modify.  Same comments about
6226      endianness as above apply. */
6227   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
6228      toDo   = d->mSize;
6229      /* chew off 32-bit chunks */
6230      while (toDo >= 4) {
6231         gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
6232                      d->guard );
6233         toDo -= 4;
6234      }
6235      /* handle possible 16-bit excess */
6236      while (toDo >= 2) {
6237        gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
6238                     d->guard );
6239         toDo -= 2;
6240      }
6241      /* chew off the remaining 8-bit chunk, if any */
6242      if (toDo == 1) {
6243         gen_store_b( mce, 1, d->mAddr, d->mSize - toDo, curr,
6244                      d->guard );
6245         toDo -= 1;
6246      }
6247      tl_assert(toDo == 0);
6248   }
6249}
6250
6251
6252static void do_origins_Store ( MCEnv* mce,
6253                               IREndness stEnd,
6254                               IRExpr* stAddr,
6255                               IRExpr* stData )
6256{
6257   Int     dszB;
6258   IRAtom* dataB;
6259   /* assert that the B value for the address is already available
6260      (somewhere), since the call to schemeE will want to see it.
6261      XXXX how does this actually ensure that?? */
6262   tl_assert(isIRAtom(stAddr));
6263   tl_assert(isIRAtom(stData));
6264   dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
6265   dataB = schemeE( mce, stData );
6266   gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
6267                     NULL/*guard*/ );
6268}
6269
6270
6271static void schemeS ( MCEnv* mce, IRStmt* st )
6272{
6273   tl_assert(MC_(clo_mc_level) == 3);
6274
6275   switch (st->tag) {
6276
6277      case Ist_AbiHint:
6278         /* The value-check instrumenter handles this - by arranging
6279            to pass the address of the next instruction to
6280            MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
6281            happen for origin tracking w.r.t. AbiHints.  So there is
6282            nothing to do here. */
6283         break;
6284
6285      case Ist_PutI: {
6286         IRPutI *puti = st->Ist.PutI.details;
6287         IRRegArray* descr_b;
6288         IRAtom      *t1, *t2, *t3, *t4;
6289         IRRegArray* descr = puti->descr;
6290         IRType equivIntTy
6291            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
6292         /* If this array is unshadowable for whatever reason,
6293            generate no code. */
6294         if (equivIntTy == Ity_INVALID)
6295            break;
6296         tl_assert(sizeofIRType(equivIntTy) >= 4);
6297         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
6298         descr_b
6299            = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
6300                            equivIntTy, descr->nElems );
6301         /* Compute a value to Put - the conjoinment of the origin for
6302            the data to be Put-ted (obviously) and of the index value
6303            (not so obviously). */
6304         t1 = schemeE( mce, puti->data );
6305         t2 = schemeE( mce, puti->ix );
6306         t3 = gen_maxU32( mce, t1, t2 );
6307         t4 = zWidenFrom32( mce, equivIntTy, t3 );
6308         stmt( 'B', mce, IRStmt_PutI( mkIRPutI(descr_b, puti->ix,
6309                                               puti->bias, t4) ));
6310         break;
6311      }
6312
6313      case Ist_Dirty:
6314         do_origins_Dirty( mce, st->Ist.Dirty.details );
6315         break;
6316
6317      case Ist_Store:
6318         do_origins_Store( mce, st->Ist.Store.end,
6319                                st->Ist.Store.addr,
6320                                st->Ist.Store.data );
6321         break;
6322
6323      case Ist_LLSC: {
6324         /* In short: treat a load-linked like a normal load followed
6325            by an assignment of the loaded (shadow) data the result
6326            temporary.  Treat a store-conditional like a normal store,
6327            and mark the result temporary as defined. */
6328         if (st->Ist.LLSC.storedata == NULL) {
6329            /* Load Linked */
6330            IRType resTy
6331               = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
6332            IRExpr* vanillaLoad
6333               = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
6334            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
6335                      || resTy == Ity_I16 || resTy == Ity_I8);
6336            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6337                              schemeE(mce, vanillaLoad));
6338         } else {
6339            /* Store conditional */
6340            do_origins_Store( mce, st->Ist.LLSC.end,
6341                                   st->Ist.LLSC.addr,
6342                                   st->Ist.LLSC.storedata );
6343            /* For the rationale behind this, see comments at the
6344               place where the V-shadow for .result is constructed, in
6345               do_shadow_LLSC.  In short, we regard .result as
6346               always-defined. */
6347            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
6348                              mkU32(0) );
6349         }
6350         break;
6351      }
6352
6353      case Ist_Put: {
6354         Int b_offset
6355            = MC_(get_otrack_shadow_offset)(
6356                 st->Ist.Put.offset,
6357                 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
6358              );
6359         if (b_offset >= 0) {
6360            /* FIXME: this isn't an atom! */
6361            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
6362                                       schemeE( mce, st->Ist.Put.data )) );
6363         }
6364         break;
6365      }
6366
6367      case Ist_WrTmp:
6368         assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
6369                           schemeE(mce, st->Ist.WrTmp.data) );
6370         break;
6371
6372      case Ist_MBE:
6373      case Ist_NoOp:
6374      case Ist_Exit:
6375      case Ist_IMark:
6376         break;
6377
6378      default:
6379         VG_(printf)("mc_translate.c: schemeS: unhandled: ");
6380         ppIRStmt(st);
6381         VG_(tool_panic)("memcheck:schemeS");
6382   }
6383}
6384
6385
6386/*--------------------------------------------------------------------*/
6387/*--- end                                           mc_translate.c ---*/
6388/*--------------------------------------------------------------------*/
6389