1
2/*--------------------------------------------------------------------*/
3/*--- Instrument IR to perform memory checking operations.         ---*/
4/*---                                               mc_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of MemCheck, a heavyweight Valgrind tool for
9   detecting memory errors.
10
11   Copyright (C) 2000-2010 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_tool_basics.h"
33#include "pub_tool_hashtable.h"     // For mc_include.h
34#include "pub_tool_libcassert.h"
35#include "pub_tool_libcprint.h"
36#include "pub_tool_tooliface.h"
37#include "pub_tool_machine.h"     // VG_(fnptr_to_fnentry)
38#include "pub_tool_xarray.h"
39#include "pub_tool_mallocfree.h"
40#include "pub_tool_libcbase.h"
41
42#include "mc_include.h"
43
44
45/* This file implements the Memcheck instrumentation, and in
46   particular contains the core of its undefined value detection
47   machinery.  For a comprehensive background of the terminology,
48   algorithms and rationale used herein, read:
49
50     Using Valgrind to detect undefined value errors with
51     bit-precision
52
53     Julian Seward and Nicholas Nethercote
54
55     2005 USENIX Annual Technical Conference (General Track),
56     Anaheim, CA, USA, April 10-15, 2005.
57
58   ----
59
60   Here is as good a place as any to record exactly when V bits are and
61   should be checked, why, and what function is responsible.
62
63
64   Memcheck complains when an undefined value is used:
65
66   1. In the condition of a conditional branch.  Because it could cause
67      incorrect control flow, and thus cause incorrect externally-visible
68      behaviour.  [mc_translate.c:complainIfUndefined]
69
70   2. As an argument to a system call, or as the value that specifies
71      the system call number.  Because it could cause an incorrect
72      externally-visible side effect.  [mc_translate.c:mc_pre_reg_read]
73
74   3. As the address in a load or store.  Because it could cause an
75      incorrect value to be used later, which could cause externally-visible
76      behaviour (eg. via incorrect control flow or an incorrect system call
77      argument)  [complainIfUndefined]
78
79   4. As the target address of a branch.  Because it could cause incorrect
80      control flow.  [complainIfUndefined]
81
82   5. As an argument to setenv, unsetenv, or putenv.  Because it could put
83      an incorrect value into the external environment.
84      [mc_replace_strmem.c:VG_WRAP_FUNCTION_ZU(*, *env)]
85
86   6. As the index in a GETI or PUTI operation.  I'm not sure why... (njn).
87      [complainIfUndefined]
88
89   7. As an argument to the VALGRIND_CHECK_MEM_IS_DEFINED and
90      VALGRIND_CHECK_VALUE_IS_DEFINED client requests.  Because the user
91      requested it.  [in memcheck.h]
92
93
94   Memcheck also complains, but should not, when an undefined value is used:
95
96   8. As the shift value in certain SIMD shift operations (but not in the
97      standard integer shift operations).  This inconsistency is due to
98      historical reasons.)  [complainIfUndefined]
99
100
101   Memcheck does not complain, but should, when an undefined value is used:
102
103   9. As an input to a client request.  Because the client request may
104      affect the visible behaviour -- see bug #144362 for an example
105      involving the malloc replacements in vg_replace_malloc.c and
106      VALGRIND_NON_SIMD_CALL* requests, where an uninitialised argument
107      isn't identified.  That bug report also has some info on how to solve
108      the problem.  [valgrind.h:VALGRIND_DO_CLIENT_REQUEST]
109
110
111   In practice, 1 and 2 account for the vast majority of cases.
112*/
113
114/*------------------------------------------------------------*/
115/*--- Forward decls                                        ---*/
116/*------------------------------------------------------------*/
117
118struct _MCEnv;
119
120static IRType  shadowTypeV ( IRType ty );
121static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
122static IRTemp  findShadowTmpB ( struct _MCEnv* mce, IRTemp orig );
123
124
125/*------------------------------------------------------------*/
126/*--- Memcheck running state, and tmp management.          ---*/
127/*------------------------------------------------------------*/
128
129/* Carries info about a particular tmp.  The tmp's number is not
130   recorded, as this is implied by (equal to) its index in the tmpMap
131   in MCEnv.  The tmp's type is also not recorded, as this is present
132   in MCEnv.sb->tyenv.
133
134   When .kind is Orig, .shadowV and .shadowB may give the identities
135   of the temps currently holding the associated definedness (shadowV)
136   and origin (shadowB) values, or these may be IRTemp_INVALID if code
137   to compute such values has not yet been emitted.
138
139   When .kind is VSh or BSh then the tmp is holds a V- or B- value,
140   and so .shadowV and .shadowB must be IRTemp_INVALID, since it is
141   illogical for a shadow tmp itself to be shadowed.
142*/
143typedef
144   enum { Orig=1, VSh=2, BSh=3 }
145   TempKind;
146
147typedef
148   struct {
149      TempKind kind;
150      IRTemp   shadowV;
151      IRTemp   shadowB;
152   }
153   TempMapEnt;
154
155
156/* Carries around state during memcheck instrumentation. */
157typedef
158   struct _MCEnv {
159      /* MODIFIED: the superblock being constructed.  IRStmts are
160         added. */
161      IRSB* sb;
162      Bool  trace;
163
164      /* MODIFIED: a table [0 .. #temps_in_sb-1] which gives the
165         current kind and possibly shadow temps for each temp in the
166         IRSB being constructed.  Note that it does not contain the
167         type of each tmp.  If you want to know the type, look at the
168         relevant entry in sb->tyenv.  It follows that at all times
169         during the instrumentation process, the valid indices for
170         tmpMap and sb->tyenv are identical, being 0 .. N-1 where N is
171         total number of Orig, V- and B- temps allocated so far.
172
173         The reason for this strange split (types in one place, all
174         other info in another) is that we need the types to be
175         attached to sb so as to make it possible to do
176         "typeOfIRExpr(mce->bb->tyenv, ...)" at various places in the
177         instrumentation process. */
178      XArray* /* of TempMapEnt */ tmpMap;
179
180      /* MODIFIED: indicates whether "bogus" literals have so far been
181         found.  Starts off False, and may change to True. */
182      Bool    bogusLiterals;
183
184      /* READONLY: the guest layout.  This indicates which parts of
185         the guest state should be regarded as 'always defined'. */
186      VexGuestLayout* layout;
187
188      /* READONLY: the host word type.  Needed for constructing
189         arguments of type 'HWord' to be passed to helper functions.
190         Ity_I32 or Ity_I64 only. */
191      IRType hWordTy;
192   }
193   MCEnv;
194
195/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
196   demand), as they are encountered.  This is for two reasons.
197
198   (1) (less important reason): Many original tmps are unused due to
199   initial IR optimisation, and we do not want to spaces in tables
200   tracking them.
201
202   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
203   table indexed [0 .. n_types-1], which gives the current shadow for
204   each original tmp, or INVALID_IRTEMP if none is so far assigned.
205   It is necessary to support making multiple assignments to a shadow
206   -- specifically, after testing a shadow for definedness, it needs
207   to be made defined.  But IR's SSA property disallows this.
208
209   (2) (more important reason): Therefore, when a shadow needs to get
210   a new value, a new temporary is created, the value is assigned to
211   that, and the tmpMap is updated to reflect the new binding.
212
213   A corollary is that if the tmpMap maps a given tmp to
214   IRTemp_INVALID and we are hoping to read that shadow tmp, it means
215   there's a read-before-write error in the original tmps.  The IR
216   sanity checker should catch all such anomalies, however.
217*/
218
219/* Create a new IRTemp of type 'ty' and kind 'kind', and add it to
220   both the table in mce->sb and to our auxiliary mapping.  Note that
221   newTemp may cause mce->tmpMap to resize, hence previous results
222   from VG_(indexXA)(mce->tmpMap) are invalidated. */
223static IRTemp newTemp ( MCEnv* mce, IRType ty, TempKind kind )
224{
225   Word       newIx;
226   TempMapEnt ent;
227   IRTemp     tmp = newIRTemp(mce->sb->tyenv, ty);
228   ent.kind    = kind;
229   ent.shadowV = IRTemp_INVALID;
230   ent.shadowB = IRTemp_INVALID;
231   newIx = VG_(addToXA)( mce->tmpMap, &ent );
232   tl_assert(newIx == (Word)tmp);
233   return tmp;
234}
235
236
237/* Find the tmp currently shadowing the given original tmp.  If none
238   so far exists, allocate one.  */
239static IRTemp findShadowTmpV ( MCEnv* mce, IRTemp orig )
240{
241   TempMapEnt* ent;
242   /* VG_(indexXA) range-checks 'orig', hence no need to check
243      here. */
244   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
245   tl_assert(ent->kind == Orig);
246   if (ent->shadowV == IRTemp_INVALID) {
247      IRTemp tmpV
248        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
249      /* newTemp may cause mce->tmpMap to resize, hence previous results
250         from VG_(indexXA) are invalid. */
251      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
252      tl_assert(ent->kind == Orig);
253      tl_assert(ent->shadowV == IRTemp_INVALID);
254      ent->shadowV = tmpV;
255   }
256   return ent->shadowV;
257}
258
259/* Allocate a new shadow for the given original tmp.  This means any
260   previous shadow is abandoned.  This is needed because it is
261   necessary to give a new value to a shadow once it has been tested
262   for undefinedness, but unfortunately IR's SSA property disallows
263   this.  Instead we must abandon the old shadow, allocate a new one
264   and use that instead.
265
266   This is the same as findShadowTmpV, except we don't bother to see
267   if a shadow temp already existed -- we simply allocate a new one
268   regardless. */
269static void newShadowTmpV ( MCEnv* mce, IRTemp orig )
270{
271   TempMapEnt* ent;
272   /* VG_(indexXA) range-checks 'orig', hence no need to check
273      here. */
274   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
275   tl_assert(ent->kind == Orig);
276   if (1) {
277      IRTemp tmpV
278        = newTemp( mce, shadowTypeV(mce->sb->tyenv->types[orig]), VSh );
279      /* newTemp may cause mce->tmpMap to resize, hence previous results
280         from VG_(indexXA) are invalid. */
281      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
282      tl_assert(ent->kind == Orig);
283      ent->shadowV = tmpV;
284   }
285}
286
287
288/*------------------------------------------------------------*/
289/*--- IRAtoms -- a subset of IRExprs                       ---*/
290/*------------------------------------------------------------*/
291
292/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
293   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
294   input, most of this code deals in atoms.  Usefully, a value atom
295   always has a V-value which is also an atom: constants are shadowed
296   by constants, and temps are shadowed by the corresponding shadow
297   temporary. */
298
299typedef  IRExpr  IRAtom;
300
301/* (used for sanity checks only): is this an atom which looks
302   like it's from original code? */
303static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
304{
305   if (a1->tag == Iex_Const)
306      return True;
307   if (a1->tag == Iex_RdTmp) {
308      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
309      return ent->kind == Orig;
310   }
311   return False;
312}
313
314/* (used for sanity checks only): is this an atom which looks
315   like it's from shadow code? */
316static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
317{
318   if (a1->tag == Iex_Const)
319      return True;
320   if (a1->tag == Iex_RdTmp) {
321      TempMapEnt* ent = VG_(indexXA)( mce->tmpMap, a1->Iex.RdTmp.tmp );
322      return ent->kind == VSh || ent->kind == BSh;
323   }
324   return False;
325}
326
327/* (used for sanity checks only): check that both args are atoms and
328   are identically-kinded. */
329static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
330{
331   if (a1->tag == Iex_RdTmp && a2->tag == Iex_RdTmp)
332      return True;
333   if (a1->tag == Iex_Const && a2->tag == Iex_Const)
334      return True;
335   return False;
336}
337
338
339/*------------------------------------------------------------*/
340/*--- Type management                                      ---*/
341/*------------------------------------------------------------*/
342
343/* Shadow state is always accessed using integer types.  This returns
344   an integer type with the same size (as per sizeofIRType) as the
345   given type.  The only valid shadow types are Bit, I8, I16, I32,
346   I64, V128. */
347
348static IRType shadowTypeV ( IRType ty )
349{
350   switch (ty) {
351      case Ity_I1:
352      case Ity_I8:
353      case Ity_I16:
354      case Ity_I32:
355      case Ity_I64:
356      case Ity_I128: return ty;
357      case Ity_F32:  return Ity_I32;
358      case Ity_F64:  return Ity_I64;
359      case Ity_V128: return Ity_V128;
360      default: ppIRType(ty);
361               VG_(tool_panic)("memcheck:shadowTypeV");
362   }
363}
364
365/* Produce a 'defined' value of the given shadow type.  Should only be
366   supplied shadow types (Bit/I8/I16/I32/UI64). */
367static IRExpr* definedOfType ( IRType ty ) {
368   switch (ty) {
369      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
370      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
371      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
372      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
373      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
374      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
375      default:       VG_(tool_panic)("memcheck:definedOfType");
376   }
377}
378
379
380/*------------------------------------------------------------*/
381/*--- Constructing IR fragments                            ---*/
382/*------------------------------------------------------------*/
383
384/* add stmt to a bb */
385static inline void stmt ( HChar cat, MCEnv* mce, IRStmt* st ) {
386   if (mce->trace) {
387      VG_(printf)("  %c: ", cat);
388      ppIRStmt(st);
389      VG_(printf)("\n");
390   }
391   addStmtToIRSB(mce->sb, st);
392}
393
394/* assign value to tmp */
395static inline
396void assign ( HChar cat, MCEnv* mce, IRTemp tmp, IRExpr* expr ) {
397   stmt(cat, mce, IRStmt_WrTmp(tmp,expr));
398}
399
400/* build various kinds of expressions */
401#define triop(_op, _arg1, _arg2, _arg3) \
402                                 IRExpr_Triop((_op),(_arg1),(_arg2),(_arg3))
403#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
404#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
405#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
406#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
407#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
408#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
409#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
410#define mkexpr(_tmp)             IRExpr_RdTmp((_tmp))
411
412/* Bind the given expression to a new temporary, and return the
413   temporary.  This effectively converts an arbitrary expression into
414   an atom.
415
416   'ty' is the type of 'e' and hence the type that the new temporary
417   needs to be.  But passing it in is redundant, since we can deduce
418   the type merely by inspecting 'e'.  So at least use that fact to
419   assert that the two types agree. */
420static IRAtom* assignNew ( HChar cat, MCEnv* mce, IRType ty, IRExpr* e )
421{
422   TempKind k;
423   IRTemp   t;
424   IRType   tyE = typeOfIRExpr(mce->sb->tyenv, e);
425   tl_assert(tyE == ty); /* so 'ty' is redundant (!) */
426   switch (cat) {
427      case 'V': k = VSh;  break;
428      case 'B': k = BSh;  break;
429      case 'C': k = Orig; break;
430                /* happens when we are making up new "orig"
431                   expressions, for IRCAS handling */
432      default: tl_assert(0);
433   }
434   t = newTemp(mce, ty, k);
435   assign(cat, mce, t, e);
436   return mkexpr(t);
437}
438
439
440/*------------------------------------------------------------*/
441/*--- Constructing definedness primitive ops               ---*/
442/*------------------------------------------------------------*/
443
444/* --------- Defined-if-either-defined --------- */
445
446static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
447   tl_assert(isShadowAtom(mce,a1));
448   tl_assert(isShadowAtom(mce,a2));
449   return assignNew('V', mce, Ity_I8, binop(Iop_And8, a1, a2));
450}
451
452static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
453   tl_assert(isShadowAtom(mce,a1));
454   tl_assert(isShadowAtom(mce,a2));
455   return assignNew('V', mce, Ity_I16, binop(Iop_And16, a1, a2));
456}
457
458static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
459   tl_assert(isShadowAtom(mce,a1));
460   tl_assert(isShadowAtom(mce,a2));
461   return assignNew('V', mce, Ity_I32, binop(Iop_And32, a1, a2));
462}
463
464static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
465   tl_assert(isShadowAtom(mce,a1));
466   tl_assert(isShadowAtom(mce,a2));
467   return assignNew('V', mce, Ity_I64, binop(Iop_And64, a1, a2));
468}
469
470static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
471   tl_assert(isShadowAtom(mce,a1));
472   tl_assert(isShadowAtom(mce,a2));
473   return assignNew('V', mce, Ity_V128, binop(Iop_AndV128, a1, a2));
474}
475
476/* --------- Undefined-if-either-undefined --------- */
477
478static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
479   tl_assert(isShadowAtom(mce,a1));
480   tl_assert(isShadowAtom(mce,a2));
481   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, a1, a2));
482}
483
484static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
485   tl_assert(isShadowAtom(mce,a1));
486   tl_assert(isShadowAtom(mce,a2));
487   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, a1, a2));
488}
489
490static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
491   tl_assert(isShadowAtom(mce,a1));
492   tl_assert(isShadowAtom(mce,a2));
493   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, a1, a2));
494}
495
496static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
497   tl_assert(isShadowAtom(mce,a1));
498   tl_assert(isShadowAtom(mce,a2));
499   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, a1, a2));
500}
501
502static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
503   tl_assert(isShadowAtom(mce,a1));
504   tl_assert(isShadowAtom(mce,a2));
505   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, a1, a2));
506}
507
508static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
509   switch (vty) {
510      case Ity_I8:   return mkUifU8(mce, a1, a2);
511      case Ity_I16:  return mkUifU16(mce, a1, a2);
512      case Ity_I32:  return mkUifU32(mce, a1, a2);
513      case Ity_I64:  return mkUifU64(mce, a1, a2);
514      case Ity_V128: return mkUifUV128(mce, a1, a2);
515      default:
516         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
517         VG_(tool_panic)("memcheck:mkUifU");
518   }
519}
520
521/* --------- The Left-family of operations. --------- */
522
523static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
524   tl_assert(isShadowAtom(mce,a1));
525   return assignNew('V', mce, Ity_I8, unop(Iop_Left8, a1));
526}
527
528static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
529   tl_assert(isShadowAtom(mce,a1));
530   return assignNew('V', mce, Ity_I16, unop(Iop_Left16, a1));
531}
532
533static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
534   tl_assert(isShadowAtom(mce,a1));
535   return assignNew('V', mce, Ity_I32, unop(Iop_Left32, a1));
536}
537
538static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
539   tl_assert(isShadowAtom(mce,a1));
540   return assignNew('V', mce, Ity_I64, unop(Iop_Left64, a1));
541}
542
543/* --------- 'Improvement' functions for AND/OR. --------- */
544
545/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
546   defined (0); all other -> undefined (1).
547*/
548static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
549{
550   tl_assert(isOriginalAtom(mce, data));
551   tl_assert(isShadowAtom(mce, vbits));
552   tl_assert(sameKindedAtoms(data, vbits));
553   return assignNew('V', mce, Ity_I8, binop(Iop_Or8, data, vbits));
554}
555
556static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
557{
558   tl_assert(isOriginalAtom(mce, data));
559   tl_assert(isShadowAtom(mce, vbits));
560   tl_assert(sameKindedAtoms(data, vbits));
561   return assignNew('V', mce, Ity_I16, binop(Iop_Or16, data, vbits));
562}
563
564static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
565{
566   tl_assert(isOriginalAtom(mce, data));
567   tl_assert(isShadowAtom(mce, vbits));
568   tl_assert(sameKindedAtoms(data, vbits));
569   return assignNew('V', mce, Ity_I32, binop(Iop_Or32, data, vbits));
570}
571
572static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
573{
574   tl_assert(isOriginalAtom(mce, data));
575   tl_assert(isShadowAtom(mce, vbits));
576   tl_assert(sameKindedAtoms(data, vbits));
577   return assignNew('V', mce, Ity_I64, binop(Iop_Or64, data, vbits));
578}
579
580static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
581{
582   tl_assert(isOriginalAtom(mce, data));
583   tl_assert(isShadowAtom(mce, vbits));
584   tl_assert(sameKindedAtoms(data, vbits));
585   return assignNew('V', mce, Ity_V128, binop(Iop_OrV128, data, vbits));
586}
587
588/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
589   defined (0); all other -> undefined (1).
590*/
591static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
592{
593   tl_assert(isOriginalAtom(mce, data));
594   tl_assert(isShadowAtom(mce, vbits));
595   tl_assert(sameKindedAtoms(data, vbits));
596   return assignNew(
597             'V', mce, Ity_I8,
598             binop(Iop_Or8,
599                   assignNew('V', mce, Ity_I8, unop(Iop_Not8, data)),
600                   vbits) );
601}
602
603static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
604{
605   tl_assert(isOriginalAtom(mce, data));
606   tl_assert(isShadowAtom(mce, vbits));
607   tl_assert(sameKindedAtoms(data, vbits));
608   return assignNew(
609             'V', mce, Ity_I16,
610             binop(Iop_Or16,
611                   assignNew('V', mce, Ity_I16, unop(Iop_Not16, data)),
612                   vbits) );
613}
614
615static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
616{
617   tl_assert(isOriginalAtom(mce, data));
618   tl_assert(isShadowAtom(mce, vbits));
619   tl_assert(sameKindedAtoms(data, vbits));
620   return assignNew(
621             'V', mce, Ity_I32,
622             binop(Iop_Or32,
623                   assignNew('V', mce, Ity_I32, unop(Iop_Not32, data)),
624                   vbits) );
625}
626
627static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
628{
629   tl_assert(isOriginalAtom(mce, data));
630   tl_assert(isShadowAtom(mce, vbits));
631   tl_assert(sameKindedAtoms(data, vbits));
632   return assignNew(
633             'V', mce, Ity_I64,
634             binop(Iop_Or64,
635                   assignNew('V', mce, Ity_I64, unop(Iop_Not64, data)),
636                   vbits) );
637}
638
639static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
640{
641   tl_assert(isOriginalAtom(mce, data));
642   tl_assert(isShadowAtom(mce, vbits));
643   tl_assert(sameKindedAtoms(data, vbits));
644   return assignNew(
645             'V', mce, Ity_V128,
646             binop(Iop_OrV128,
647                   assignNew('V', mce, Ity_V128, unop(Iop_NotV128, data)),
648                   vbits) );
649}
650
651/* --------- Pessimising casts. --------- */
652
653static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
654{
655   IRType  src_ty;
656   IRAtom* tmp1;
657   /* Note, dst_ty is a shadow type, not an original type. */
658   /* First of all, collapse vbits down to a single bit. */
659   tl_assert(isShadowAtom(mce,vbits));
660   src_ty = typeOfIRExpr(mce->sb->tyenv, vbits);
661
662   /* Fast-track some common cases */
663   if (src_ty == Ity_I32 && dst_ty == Ity_I32)
664      return assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
665
666   if (src_ty == Ity_I64 && dst_ty == Ity_I64)
667      return assignNew('V', mce, Ity_I64, unop(Iop_CmpwNEZ64, vbits));
668
669   if (src_ty == Ity_I32 && dst_ty == Ity_I64) {
670      IRAtom* tmp = assignNew('V', mce, Ity_I32, unop(Iop_CmpwNEZ32, vbits));
671      return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, tmp, tmp));
672   }
673
674   /* Else do it the slow way .. */
675   tmp1   = NULL;
676   switch (src_ty) {
677      case Ity_I1:
678         tmp1 = vbits;
679         break;
680      case Ity_I8:
681         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
682         break;
683      case Ity_I16:
684         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
685         break;
686      case Ity_I32:
687         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
688         break;
689      case Ity_I64:
690         tmp1 = assignNew('V', mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
691         break;
692      case Ity_I128: {
693         /* Gah.  Chop it in half, OR the halves together, and compare
694            that with zero. */
695         IRAtom* tmp2 = assignNew('V', mce, Ity_I64, unop(Iop_128HIto64, vbits));
696         IRAtom* tmp3 = assignNew('V', mce, Ity_I64, unop(Iop_128to64, vbits));
697         IRAtom* tmp4 = assignNew('V', mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
698         tmp1         = assignNew('V', mce, Ity_I1,
699                                       unop(Iop_CmpNEZ64, tmp4));
700         break;
701      }
702      default:
703         ppIRType(src_ty);
704         VG_(tool_panic)("mkPCastTo(1)");
705   }
706   tl_assert(tmp1);
707   /* Now widen up to the dst type. */
708   switch (dst_ty) {
709      case Ity_I1:
710         return tmp1;
711      case Ity_I8:
712         return assignNew('V', mce, Ity_I8, unop(Iop_1Sto8, tmp1));
713      case Ity_I16:
714         return assignNew('V', mce, Ity_I16, unop(Iop_1Sto16, tmp1));
715      case Ity_I32:
716         return assignNew('V', mce, Ity_I32, unop(Iop_1Sto32, tmp1));
717      case Ity_I64:
718         return assignNew('V', mce, Ity_I64, unop(Iop_1Sto64, tmp1));
719      case Ity_V128:
720         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
721         tmp1 = assignNew('V', mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
722         return tmp1;
723      case Ity_I128:
724         tmp1 = assignNew('V', mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
725         tmp1 = assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
726         return tmp1;
727      default:
728         ppIRType(dst_ty);
729         VG_(tool_panic)("mkPCastTo(2)");
730   }
731}
732
733/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
734/*
735   Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
736   PCasting to Ity_U1.  However, sometimes it is necessary to be more
737   accurate.  The insight is that the result is defined if two
738   corresponding bits can be found, one from each argument, so that
739   both bits are defined but are different -- that makes EQ say "No"
740   and NE say "Yes".  Hence, we compute an improvement term and DifD
741   it onto the "normal" (UifU) result.
742
743   The result is:
744
745   PCastTo<1> (
746      -- naive version
747      PCastTo<sz>( UifU<sz>(vxx, vyy) )
748
749      `DifD<sz>`
750
751      -- improvement term
752      PCastTo<sz>( PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) ) )
753   )
754
755   where
756     vec contains 0 (defined) bits where the corresponding arg bits
757     are defined but different, and 1 bits otherwise.
758
759     vec = Or<sz>( vxx,   // 0 iff bit defined
760                   vyy,   // 0 iff bit defined
761                   Not<sz>(Xor<sz>( xx, yy )) // 0 iff bits different
762                 )
763
764     If any bit of vec is 0, the result is defined and so the
765     improvement term should produce 0...0, else it should produce
766     1...1.
767
768     Hence require for the improvement term:
769
770        if vec == 1...1 then 1...1 else 0...0
771     ->
772        PCast<sz>( CmpEQ<sz> ( vec, 1...1 ) )
773
774   This was extensively re-analysed and checked on 6 July 05.
775*/
776static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
777                                    IRType  ty,
778                                    IRAtom* vxx, IRAtom* vyy,
779                                    IRAtom* xx,  IRAtom* yy )
780{
781   IRAtom *naive, *vec, *improvement_term;
782   IRAtom *improved, *final_cast, *top;
783   IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP, opOR;
784
785   tl_assert(isShadowAtom(mce,vxx));
786   tl_assert(isShadowAtom(mce,vyy));
787   tl_assert(isOriginalAtom(mce,xx));
788   tl_assert(isOriginalAtom(mce,yy));
789   tl_assert(sameKindedAtoms(vxx,xx));
790   tl_assert(sameKindedAtoms(vyy,yy));
791
792   switch (ty) {
793      case Ity_I32:
794         opOR   = Iop_Or32;
795         opDIFD = Iop_And32;
796         opUIFU = Iop_Or32;
797         opNOT  = Iop_Not32;
798         opXOR  = Iop_Xor32;
799         opCMP  = Iop_CmpEQ32;
800         top    = mkU32(0xFFFFFFFF);
801         break;
802      case Ity_I64:
803         opOR   = Iop_Or64;
804         opDIFD = Iop_And64;
805         opUIFU = Iop_Or64;
806         opNOT  = Iop_Not64;
807         opXOR  = Iop_Xor64;
808         opCMP  = Iop_CmpEQ64;
809         top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
810         break;
811      default:
812         VG_(tool_panic)("expensiveCmpEQorNE");
813   }
814
815   naive
816      = mkPCastTo(mce,ty,
817                  assignNew('V', mce, ty, binop(opUIFU, vxx, vyy)));
818
819   vec
820      = assignNew(
821           'V', mce,ty,
822           binop( opOR,
823                  assignNew('V', mce,ty, binop(opOR, vxx, vyy)),
824                  assignNew(
825                     'V', mce,ty,
826                     unop( opNOT,
827                           assignNew('V', mce,ty, binop(opXOR, xx, yy))))));
828
829   improvement_term
830      = mkPCastTo( mce,ty,
831                   assignNew('V', mce,Ity_I1, binop(opCMP, vec, top)));
832
833   improved
834      = assignNew( 'V', mce,ty, binop(opDIFD, naive, improvement_term) );
835
836   final_cast
837      = mkPCastTo( mce, Ity_I1, improved );
838
839   return final_cast;
840}
841
842
843/* --------- Semi-accurate interpretation of CmpORD. --------- */
844
845/* CmpORD32{S,U} does PowerPC-style 3-way comparisons:
846
847      CmpORD32S(x,y) = 1<<3   if  x <s y
848                     = 1<<2   if  x >s y
849                     = 1<<1   if  x == y
850
851   and similarly the unsigned variant.  The default interpretation is:
852
853      CmpORD32{S,U}#(x,y,x#,y#) = PCast(x# `UifU` y#)
854                                  & (7<<1)
855
856   The "& (7<<1)" reflects the fact that all result bits except 3,2,1
857   are zero and therefore defined (viz, zero).
858
859   Also deal with a special case better:
860
861      CmpORD32S(x,0)
862
863   Here, bit 3 (LT) of the result is a copy of the top bit of x and
864   will be defined even if the rest of x isn't.  In which case we do:
865
866      CmpORD32S#(x,x#,0,{impliedly 0}#)
867         = PCast(x#) & (3<<1)      -- standard interp for GT#,EQ#
868           | (x# >>u 31) << 3      -- LT# = x#[31]
869
870   Analogous handling for CmpORD64{S,U}.
871*/
872static Bool isZeroU32 ( IRAtom* e )
873{
874   return
875      toBool( e->tag == Iex_Const
876              && e->Iex.Const.con->tag == Ico_U32
877              && e->Iex.Const.con->Ico.U32 == 0 );
878}
879
880static Bool isZeroU64 ( IRAtom* e )
881{
882   return
883      toBool( e->tag == Iex_Const
884              && e->Iex.Const.con->tag == Ico_U64
885              && e->Iex.Const.con->Ico.U64 == 0 );
886}
887
888static IRAtom* doCmpORD ( MCEnv*  mce,
889                          IROp    cmp_op,
890                          IRAtom* xxhash, IRAtom* yyhash,
891                          IRAtom* xx,     IRAtom* yy )
892{
893   Bool   m64    = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U;
894   Bool   syned  = cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD32S;
895   IROp   opOR   = m64 ? Iop_Or64  : Iop_Or32;
896   IROp   opAND  = m64 ? Iop_And64 : Iop_And32;
897   IROp   opSHL  = m64 ? Iop_Shl64 : Iop_Shl32;
898   IROp   opSHR  = m64 ? Iop_Shr64 : Iop_Shr32;
899   IRType ty     = m64 ? Ity_I64   : Ity_I32;
900   Int    width  = m64 ? 64        : 32;
901
902   Bool (*isZero)(IRAtom*) = m64 ? isZeroU64 : isZeroU32;
903
904   IRAtom* threeLeft1 = NULL;
905   IRAtom* sevenLeft1 = NULL;
906
907   tl_assert(isShadowAtom(mce,xxhash));
908   tl_assert(isShadowAtom(mce,yyhash));
909   tl_assert(isOriginalAtom(mce,xx));
910   tl_assert(isOriginalAtom(mce,yy));
911   tl_assert(sameKindedAtoms(xxhash,xx));
912   tl_assert(sameKindedAtoms(yyhash,yy));
913   tl_assert(cmp_op == Iop_CmpORD32S || cmp_op == Iop_CmpORD32U
914             || cmp_op == Iop_CmpORD64S || cmp_op == Iop_CmpORD64U);
915
916   if (0) {
917      ppIROp(cmp_op); VG_(printf)(" ");
918      ppIRExpr(xx); VG_(printf)(" "); ppIRExpr( yy ); VG_(printf)("\n");
919   }
920
921   if (syned && isZero(yy)) {
922      /* fancy interpretation */
923      /* if yy is zero, then it must be fully defined (zero#). */
924      tl_assert(isZero(yyhash));
925      threeLeft1 = m64 ? mkU64(3<<1) : mkU32(3<<1);
926      return
927         binop(
928            opOR,
929            assignNew(
930               'V', mce,ty,
931               binop(
932                  opAND,
933                  mkPCastTo(mce,ty, xxhash),
934                  threeLeft1
935               )),
936            assignNew(
937               'V', mce,ty,
938               binop(
939                  opSHL,
940                  assignNew(
941                     'V', mce,ty,
942                     binop(opSHR, xxhash, mkU8(width-1))),
943                  mkU8(3)
944               ))
945	 );
946   } else {
947      /* standard interpretation */
948      sevenLeft1 = m64 ? mkU64(7<<1) : mkU32(7<<1);
949      return
950         binop(
951            opAND,
952            mkPCastTo( mce,ty,
953                       mkUifU(mce,ty, xxhash,yyhash)),
954            sevenLeft1
955         );
956   }
957}
958
959
960/*------------------------------------------------------------*/
961/*--- Emit a test and complaint if something is undefined. ---*/
962/*------------------------------------------------------------*/
963
964static IRAtom* schemeE ( MCEnv* mce, IRExpr* e ); /* fwds */
965
966
967/* Set the annotations on a dirty helper to indicate that the stack
968   pointer and instruction pointers might be read.  This is the
969   behaviour of all 'emit-a-complaint' style functions we might
970   call. */
971
972static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
973   di->nFxState = 2;
974   di->fxState[0].fx     = Ifx_Read;
975   di->fxState[0].offset = mce->layout->offset_SP;
976   di->fxState[0].size   = mce->layout->sizeof_SP;
977   di->fxState[1].fx     = Ifx_Read;
978   di->fxState[1].offset = mce->layout->offset_IP;
979   di->fxState[1].size   = mce->layout->sizeof_IP;
980}
981
982
983/* Check the supplied **original** atom for undefinedness, and emit a
984   complaint if so.  Once that happens, mark it as defined.  This is
985   possible because the atom is either a tmp or literal.  If it's a
986   tmp, it will be shadowed by a tmp, and so we can set the shadow to
987   be defined.  In fact as mentioned above, we will have to allocate a
988   new tmp to carry the new 'defined' shadow value, and update the
989   original->tmp mapping accordingly; we cannot simply assign a new
990   value to an existing shadow tmp as this breaks SSAness -- resulting
991   in the post-instrumentation sanity checker spluttering in disapproval.
992*/
993static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
994{
995   IRAtom*  vatom;
996   IRType   ty;
997   Int      sz;
998   IRDirty* di;
999   IRAtom*  cond;
1000   IRAtom*  origin;
1001   void*    fn;
1002   HChar*   nm;
1003   IRExpr** args;
1004   Int      nargs;
1005
1006   // Don't do V bit tests if we're not reporting undefined value errors.
1007   if (MC_(clo_mc_level) == 1)
1008      return;
1009
1010   /* Since the original expression is atomic, there's no duplicated
1011      work generated by making multiple V-expressions for it.  So we
1012      don't really care about the possibility that someone else may
1013      also create a V-interpretion for it. */
1014   tl_assert(isOriginalAtom(mce, atom));
1015   vatom = expr2vbits( mce, atom );
1016   tl_assert(isShadowAtom(mce, vatom));
1017   tl_assert(sameKindedAtoms(atom, vatom));
1018
1019   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1020
1021   /* sz is only used for constructing the error message */
1022   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
1023
1024   cond = mkPCastTo( mce, Ity_I1, vatom );
1025   /* cond will be 0 if all defined, and 1 if any not defined. */
1026
1027   /* Get the origin info for the value we are about to check.  At
1028      least, if we are doing origin tracking.  If not, use a dummy
1029      zero origin. */
1030   if (MC_(clo_mc_level) == 3) {
1031      origin = schemeE( mce, atom );
1032      if (mce->hWordTy == Ity_I64) {
1033         origin = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, origin) );
1034      }
1035   } else {
1036      origin = NULL;
1037   }
1038
1039   fn    = NULL;
1040   nm    = NULL;
1041   args  = NULL;
1042   nargs = -1;
1043
1044   switch (sz) {
1045      case 0:
1046         if (origin) {
1047            fn    = &MC_(helperc_value_check0_fail_w_o);
1048            nm    = "MC_(helperc_value_check0_fail_w_o)";
1049            args  = mkIRExprVec_1(origin);
1050            nargs = 1;
1051         } else {
1052            fn    = &MC_(helperc_value_check0_fail_no_o);
1053            nm    = "MC_(helperc_value_check0_fail_no_o)";
1054            args  = mkIRExprVec_0();
1055            nargs = 0;
1056         }
1057         break;
1058      case 1:
1059         if (origin) {
1060            fn    = &MC_(helperc_value_check1_fail_w_o);
1061            nm    = "MC_(helperc_value_check1_fail_w_o)";
1062            args  = mkIRExprVec_1(origin);
1063            nargs = 1;
1064         } else {
1065            fn    = &MC_(helperc_value_check1_fail_no_o);
1066            nm    = "MC_(helperc_value_check1_fail_no_o)";
1067            args  = mkIRExprVec_0();
1068            nargs = 0;
1069         }
1070         break;
1071      case 4:
1072         if (origin) {
1073            fn    = &MC_(helperc_value_check4_fail_w_o);
1074            nm    = "MC_(helperc_value_check4_fail_w_o)";
1075            args  = mkIRExprVec_1(origin);
1076            nargs = 1;
1077         } else {
1078            fn    = &MC_(helperc_value_check4_fail_no_o);
1079            nm    = "MC_(helperc_value_check4_fail_no_o)";
1080            args  = mkIRExprVec_0();
1081            nargs = 0;
1082         }
1083         break;
1084      case 8:
1085         if (origin) {
1086            fn    = &MC_(helperc_value_check8_fail_w_o);
1087            nm    = "MC_(helperc_value_check8_fail_w_o)";
1088            args  = mkIRExprVec_1(origin);
1089            nargs = 1;
1090         } else {
1091            fn    = &MC_(helperc_value_check8_fail_no_o);
1092            nm    = "MC_(helperc_value_check8_fail_no_o)";
1093            args  = mkIRExprVec_0();
1094            nargs = 0;
1095         }
1096         break;
1097      case 2:
1098      case 16:
1099         if (origin) {
1100            fn    = &MC_(helperc_value_checkN_fail_w_o);
1101            nm    = "MC_(helperc_value_checkN_fail_w_o)";
1102            args  = mkIRExprVec_2( mkIRExpr_HWord( sz ), origin);
1103            nargs = 2;
1104         } else {
1105            fn    = &MC_(helperc_value_checkN_fail_no_o);
1106            nm    = "MC_(helperc_value_checkN_fail_no_o)";
1107            args  = mkIRExprVec_1( mkIRExpr_HWord( sz ) );
1108            nargs = 1;
1109         }
1110         break;
1111      default:
1112         VG_(tool_panic)("unexpected szB");
1113   }
1114
1115   tl_assert(fn);
1116   tl_assert(nm);
1117   tl_assert(args);
1118   tl_assert(nargs >= 0 && nargs <= 2);
1119   tl_assert( (MC_(clo_mc_level) == 3 && origin != NULL)
1120              || (MC_(clo_mc_level) == 2 && origin == NULL) );
1121
1122   di = unsafeIRDirty_0_N( nargs/*regparms*/, nm,
1123                           VG_(fnptr_to_fnentry)( fn ), args );
1124   di->guard = cond;
1125   setHelperAnns( mce, di );
1126   stmt( 'V', mce, IRStmt_Dirty(di));
1127
1128   /* Set the shadow tmp to be defined.  First, update the
1129      orig->shadow tmp mapping to reflect the fact that this shadow is
1130      getting a new value. */
1131   tl_assert(isIRAtom(vatom));
1132   /* sameKindedAtoms ... */
1133   if (vatom->tag == Iex_RdTmp) {
1134      tl_assert(atom->tag == Iex_RdTmp);
1135      newShadowTmpV(mce, atom->Iex.RdTmp.tmp);
1136      assign('V', mce, findShadowTmpV(mce, atom->Iex.RdTmp.tmp),
1137                       definedOfType(ty));
1138   }
1139}
1140
1141
1142/*------------------------------------------------------------*/
1143/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
1144/*------------------------------------------------------------*/
1145
1146/* Examine the always-defined sections declared in layout to see if
1147   the (offset,size) section is within one.  Note, is is an error to
1148   partially fall into such a region: (offset,size) should either be
1149   completely in such a region or completely not-in such a region.
1150*/
1151static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
1152{
1153   Int minoffD, maxoffD, i;
1154   Int minoff = offset;
1155   Int maxoff = minoff + size - 1;
1156   tl_assert((minoff & ~0xFFFF) == 0);
1157   tl_assert((maxoff & ~0xFFFF) == 0);
1158
1159   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
1160      minoffD = mce->layout->alwaysDefd[i].offset;
1161      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
1162      tl_assert((minoffD & ~0xFFFF) == 0);
1163      tl_assert((maxoffD & ~0xFFFF) == 0);
1164
1165      if (maxoff < minoffD || maxoffD < minoff)
1166         continue; /* no overlap */
1167      if (minoff >= minoffD && maxoff <= maxoffD)
1168         return True; /* completely contained in an always-defd section */
1169
1170      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
1171   }
1172   return False; /* could not find any containing section */
1173}
1174
1175
1176/* Generate into bb suitable actions to shadow this Put.  If the state
1177   slice is marked 'always defined', do nothing.  Otherwise, write the
1178   supplied V bits to the shadow state.  We can pass in either an
1179   original atom or a V-atom, but not both.  In the former case the
1180   relevant V-bits are then generated from the original.
1181*/
1182static
1183void do_shadow_PUT ( MCEnv* mce,  Int offset,
1184                     IRAtom* atom, IRAtom* vatom )
1185{
1186   IRType ty;
1187
1188   // Don't do shadow PUTs if we're not doing undefined value checking.
1189   // Their absence lets Vex's optimiser remove all the shadow computation
1190   // that they depend on, which includes GETs of the shadow registers.
1191   if (MC_(clo_mc_level) == 1)
1192      return;
1193
1194   if (atom) {
1195      tl_assert(!vatom);
1196      tl_assert(isOriginalAtom(mce, atom));
1197      vatom = expr2vbits( mce, atom );
1198   } else {
1199      tl_assert(vatom);
1200      tl_assert(isShadowAtom(mce, vatom));
1201   }
1202
1203   ty = typeOfIRExpr(mce->sb->tyenv, vatom);
1204   tl_assert(ty != Ity_I1);
1205   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1206      /* later: no ... */
1207      /* emit code to emit a complaint if any of the vbits are 1. */
1208      /* complainIfUndefined(mce, atom); */
1209   } else {
1210      /* Do a plain shadow Put. */
1211      stmt( 'V', mce, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
1212   }
1213}
1214
1215
1216/* Return an expression which contains the V bits corresponding to the
1217   given GETI (passed in in pieces).
1218*/
1219static
1220void do_shadow_PUTI ( MCEnv* mce,
1221                      IRRegArray* descr,
1222                      IRAtom* ix, Int bias, IRAtom* atom )
1223{
1224   IRAtom* vatom;
1225   IRType  ty, tyS;
1226   Int     arrSize;;
1227
1228   // Don't do shadow PUTIs if we're not doing undefined value checking.
1229   // Their absence lets Vex's optimiser remove all the shadow computation
1230   // that they depend on, which includes GETIs of the shadow registers.
1231   if (MC_(clo_mc_level) == 1)
1232      return;
1233
1234   tl_assert(isOriginalAtom(mce,atom));
1235   vatom = expr2vbits( mce, atom );
1236   tl_assert(sameKindedAtoms(atom, vatom));
1237   ty   = descr->elemTy;
1238   tyS  = shadowTypeV(ty);
1239   arrSize = descr->nElems * sizeofIRType(ty);
1240   tl_assert(ty != Ity_I1);
1241   tl_assert(isOriginalAtom(mce,ix));
1242   complainIfUndefined(mce,ix);
1243   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1244      /* later: no ... */
1245      /* emit code to emit a complaint if any of the vbits are 1. */
1246      /* complainIfUndefined(mce, atom); */
1247   } else {
1248      /* Do a cloned version of the Put that refers to the shadow
1249         area. */
1250      IRRegArray* new_descr
1251         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1252                         tyS, descr->nElems);
1253      stmt( 'V', mce, IRStmt_PutI( new_descr, ix, bias, vatom ));
1254   }
1255}
1256
1257
1258/* Return an expression which contains the V bits corresponding to the
1259   given GET (passed in in pieces).
1260*/
1261static
1262IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
1263{
1264   IRType tyS = shadowTypeV(ty);
1265   tl_assert(ty != Ity_I1);
1266   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
1267      /* Always defined, return all zeroes of the relevant type */
1268      return definedOfType(tyS);
1269   } else {
1270      /* return a cloned version of the Get that refers to the shadow
1271         area. */
1272      /* FIXME: this isn't an atom! */
1273      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
1274   }
1275}
1276
1277
1278/* Return an expression which contains the V bits corresponding to the
1279   given GETI (passed in in pieces).
1280*/
1281static
1282IRExpr* shadow_GETI ( MCEnv* mce,
1283                      IRRegArray* descr, IRAtom* ix, Int bias )
1284{
1285   IRType ty   = descr->elemTy;
1286   IRType tyS  = shadowTypeV(ty);
1287   Int arrSize = descr->nElems * sizeofIRType(ty);
1288   tl_assert(ty != Ity_I1);
1289   tl_assert(isOriginalAtom(mce,ix));
1290   complainIfUndefined(mce,ix);
1291   if (isAlwaysDefd(mce, descr->base, arrSize)) {
1292      /* Always defined, return all zeroes of the relevant type */
1293      return definedOfType(tyS);
1294   } else {
1295      /* return a cloned version of the Get that refers to the shadow
1296         area. */
1297      IRRegArray* new_descr
1298         = mkIRRegArray( descr->base + mce->layout->total_sizeB,
1299                         tyS, descr->nElems);
1300      return IRExpr_GetI( new_descr, ix, bias );
1301   }
1302}
1303
1304
1305/*------------------------------------------------------------*/
1306/*--- Generating approximations for unknown operations,    ---*/
1307/*--- using lazy-propagate semantics                       ---*/
1308/*------------------------------------------------------------*/
1309
1310/* Lazy propagation of undefinedness from two values, resulting in the
1311   specified shadow type.
1312*/
1313static
1314IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
1315{
1316   IRAtom* at;
1317   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1318   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1319   tl_assert(isShadowAtom(mce,va1));
1320   tl_assert(isShadowAtom(mce,va2));
1321
1322   /* The general case is inefficient because PCast is an expensive
1323      operation.  Here are some special cases which use PCast only
1324      once rather than twice. */
1325
1326   /* I64 x I64 -> I64 */
1327   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
1328      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
1329      at = mkUifU(mce, Ity_I64, va1, va2);
1330      at = mkPCastTo(mce, Ity_I64, at);
1331      return at;
1332   }
1333
1334   /* I64 x I64 -> I32 */
1335   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
1336      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
1337      at = mkUifU(mce, Ity_I64, va1, va2);
1338      at = mkPCastTo(mce, Ity_I32, at);
1339      return at;
1340   }
1341
1342   if (0) {
1343      VG_(printf)("mkLazy2 ");
1344      ppIRType(t1);
1345      VG_(printf)("_");
1346      ppIRType(t2);
1347      VG_(printf)("_");
1348      ppIRType(finalVty);
1349      VG_(printf)("\n");
1350   }
1351
1352   /* General case: force everything via 32-bit intermediaries. */
1353   at = mkPCastTo(mce, Ity_I32, va1);
1354   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1355   at = mkPCastTo(mce, finalVty, at);
1356   return at;
1357}
1358
1359
1360/* 3-arg version of the above. */
1361static
1362IRAtom* mkLazy3 ( MCEnv* mce, IRType finalVty,
1363                  IRAtom* va1, IRAtom* va2, IRAtom* va3 )
1364{
1365   IRAtom* at;
1366   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1367   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1368   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1369   tl_assert(isShadowAtom(mce,va1));
1370   tl_assert(isShadowAtom(mce,va2));
1371   tl_assert(isShadowAtom(mce,va3));
1372
1373   /* The general case is inefficient because PCast is an expensive
1374      operation.  Here are some special cases which use PCast only
1375      twice rather than three times. */
1376
1377   /* I32 x I64 x I64 -> I64 */
1378   /* Standard FP idiom: rm x FParg1 x FParg2 -> FPresult */
1379   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1380       && finalVty == Ity_I64) {
1381      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I64\n");
1382      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1383         mode indication which is fully defined, this should get
1384         folded out later. */
1385      at = mkPCastTo(mce, Ity_I64, va1);
1386      /* Now fold in 2nd and 3rd args. */
1387      at = mkUifU(mce, Ity_I64, at, va2);
1388      at = mkUifU(mce, Ity_I64, at, va3);
1389      /* and PCast once again. */
1390      at = mkPCastTo(mce, Ity_I64, at);
1391      return at;
1392   }
1393
1394   /* I32 x I64 x I64 -> I32 */
1395   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64
1396       && finalVty == Ity_I32) {
1397      if (0) VG_(printf)("mkLazy3: I32 x I64 x I64 -> I32\n");
1398      at = mkPCastTo(mce, Ity_I64, va1);
1399      at = mkUifU(mce, Ity_I64, at, va2);
1400      at = mkUifU(mce, Ity_I64, at, va3);
1401      at = mkPCastTo(mce, Ity_I32, at);
1402      return at;
1403   }
1404
1405   /* I32 x I32 x I32 -> I32 */
1406   /* 32-bit FP idiom, as (eg) happens on ARM */
1407   if (t1 == Ity_I32 && t2 == Ity_I32 && t3 == Ity_I32
1408       && finalVty == Ity_I32) {
1409      if (0) VG_(printf)("mkLazy3: I32 x I32 x I32 -> I32\n");
1410      at = va1;
1411      at = mkUifU(mce, Ity_I32, at, va2);
1412      at = mkUifU(mce, Ity_I32, at, va3);
1413      at = mkPCastTo(mce, Ity_I32, at);
1414      return at;
1415   }
1416
1417   if (1) {
1418      VG_(printf)("mkLazy3: ");
1419      ppIRType(t1);
1420      VG_(printf)(" x ");
1421      ppIRType(t2);
1422      VG_(printf)(" x ");
1423      ppIRType(t3);
1424      VG_(printf)(" -> ");
1425      ppIRType(finalVty);
1426      VG_(printf)("\n");
1427   }
1428
1429   tl_assert(0);
1430   /* General case: force everything via 32-bit intermediaries. */
1431   /*
1432   at = mkPCastTo(mce, Ity_I32, va1);
1433   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
1434   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va3));
1435   at = mkPCastTo(mce, finalVty, at);
1436   return at;
1437   */
1438}
1439
1440
1441/* 4-arg version of the above. */
1442static
1443IRAtom* mkLazy4 ( MCEnv* mce, IRType finalVty,
1444                  IRAtom* va1, IRAtom* va2, IRAtom* va3, IRAtom* va4 )
1445{
1446   IRAtom* at;
1447   IRType t1 = typeOfIRExpr(mce->sb->tyenv, va1);
1448   IRType t2 = typeOfIRExpr(mce->sb->tyenv, va2);
1449   IRType t3 = typeOfIRExpr(mce->sb->tyenv, va3);
1450   IRType t4 = typeOfIRExpr(mce->sb->tyenv, va4);
1451   tl_assert(isShadowAtom(mce,va1));
1452   tl_assert(isShadowAtom(mce,va2));
1453   tl_assert(isShadowAtom(mce,va3));
1454   tl_assert(isShadowAtom(mce,va4));
1455
1456   /* The general case is inefficient because PCast is an expensive
1457      operation.  Here are some special cases which use PCast only
1458      twice rather than three times. */
1459
1460   /* I32 x I64 x I64 x I64 -> I64 */
1461   /* Standard FP idiom: rm x FParg1 x FParg2 x FParg3 -> FPresult */
1462   if (t1 == Ity_I32 && t2 == Ity_I64 && t3 == Ity_I64 && t4 == Ity_I64
1463       && finalVty == Ity_I64) {
1464      if (0) VG_(printf)("mkLazy4: I32 x I64 x I64 x I64 -> I64\n");
1465      /* Widen 1st arg to I64.  Since 1st arg is typically a rounding
1466         mode indication which is fully defined, this should get
1467         folded out later. */
1468      at = mkPCastTo(mce, Ity_I64, va1);
1469      /* Now fold in 2nd, 3rd, 4th args. */
1470      at = mkUifU(mce, Ity_I64, at, va2);
1471      at = mkUifU(mce, Ity_I64, at, va3);
1472      at = mkUifU(mce, Ity_I64, at, va4);
1473      /* and PCast once again. */
1474      at = mkPCastTo(mce, Ity_I64, at);
1475      return at;
1476   }
1477
1478   if (1) {
1479      VG_(printf)("mkLazy4: ");
1480      ppIRType(t1);
1481      VG_(printf)(" x ");
1482      ppIRType(t2);
1483      VG_(printf)(" x ");
1484      ppIRType(t3);
1485      VG_(printf)(" x ");
1486      ppIRType(t4);
1487      VG_(printf)(" -> ");
1488      ppIRType(finalVty);
1489      VG_(printf)("\n");
1490   }
1491
1492   tl_assert(0);
1493}
1494
1495
1496/* Do the lazy propagation game from a null-terminated vector of
1497   atoms.  This is presumably the arguments to a helper call, so the
1498   IRCallee info is also supplied in order that we can know which
1499   arguments should be ignored (via the .mcx_mask field).
1500*/
1501static
1502IRAtom* mkLazyN ( MCEnv* mce,
1503                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
1504{
1505   Int     i;
1506   IRAtom* here;
1507   IRAtom* curr;
1508   IRType  mergeTy;
1509   IRType  mergeTy64 = True;
1510
1511   /* Decide on the type of the merge intermediary.  If all relevant
1512      args are I64, then it's I64.  In all other circumstances, use
1513      I32. */
1514   for (i = 0; exprvec[i]; i++) {
1515      tl_assert(i < 32);
1516      tl_assert(isOriginalAtom(mce, exprvec[i]));
1517      if (cee->mcx_mask & (1<<i))
1518         continue;
1519      if (typeOfIRExpr(mce->sb->tyenv, exprvec[i]) != Ity_I64)
1520         mergeTy64 = False;
1521   }
1522
1523   mergeTy = mergeTy64  ? Ity_I64  : Ity_I32;
1524   curr    = definedOfType(mergeTy);
1525
1526   for (i = 0; exprvec[i]; i++) {
1527      tl_assert(i < 32);
1528      tl_assert(isOriginalAtom(mce, exprvec[i]));
1529      /* Only take notice of this arg if the callee's mc-exclusion
1530         mask does not say it is to be excluded. */
1531      if (cee->mcx_mask & (1<<i)) {
1532         /* the arg is to be excluded from definedness checking.  Do
1533            nothing. */
1534         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
1535      } else {
1536         /* calculate the arg's definedness, and pessimistically merge
1537            it in. */
1538         here = mkPCastTo( mce, mergeTy, expr2vbits(mce, exprvec[i]) );
1539         curr = mergeTy64
1540                   ? mkUifU64(mce, here, curr)
1541                   : mkUifU32(mce, here, curr);
1542      }
1543   }
1544   return mkPCastTo(mce, finalVtype, curr );
1545}
1546
1547
1548/*------------------------------------------------------------*/
1549/*--- Generating expensive sequences for exact carry-chain ---*/
1550/*--- propagation in add/sub and related operations.       ---*/
1551/*------------------------------------------------------------*/
1552
1553static
1554IRAtom* expensiveAddSub ( MCEnv*  mce,
1555                          Bool    add,
1556                          IRType  ty,
1557                          IRAtom* qaa, IRAtom* qbb,
1558                          IRAtom* aa,  IRAtom* bb )
1559{
1560   IRAtom *a_min, *b_min, *a_max, *b_max;
1561   IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
1562
1563   tl_assert(isShadowAtom(mce,qaa));
1564   tl_assert(isShadowAtom(mce,qbb));
1565   tl_assert(isOriginalAtom(mce,aa));
1566   tl_assert(isOriginalAtom(mce,bb));
1567   tl_assert(sameKindedAtoms(qaa,aa));
1568   tl_assert(sameKindedAtoms(qbb,bb));
1569
1570   switch (ty) {
1571      case Ity_I32:
1572         opAND = Iop_And32;
1573         opOR  = Iop_Or32;
1574         opXOR = Iop_Xor32;
1575         opNOT = Iop_Not32;
1576         opADD = Iop_Add32;
1577         opSUB = Iop_Sub32;
1578         break;
1579      case Ity_I64:
1580         opAND = Iop_And64;
1581         opOR  = Iop_Or64;
1582         opXOR = Iop_Xor64;
1583         opNOT = Iop_Not64;
1584         opADD = Iop_Add64;
1585         opSUB = Iop_Sub64;
1586         break;
1587      default:
1588         VG_(tool_panic)("expensiveAddSub");
1589   }
1590
1591   // a_min = aa & ~qaa
1592   a_min = assignNew('V', mce,ty,
1593                     binop(opAND, aa,
1594                                  assignNew('V', mce,ty, unop(opNOT, qaa))));
1595
1596   // b_min = bb & ~qbb
1597   b_min = assignNew('V', mce,ty,
1598                     binop(opAND, bb,
1599                                  assignNew('V', mce,ty, unop(opNOT, qbb))));
1600
1601   // a_max = aa | qaa
1602   a_max = assignNew('V', mce,ty, binop(opOR, aa, qaa));
1603
1604   // b_max = bb | qbb
1605   b_max = assignNew('V', mce,ty, binop(opOR, bb, qbb));
1606
1607   if (add) {
1608      // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1609      return
1610      assignNew('V', mce,ty,
1611         binop( opOR,
1612                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1613                assignNew('V', mce,ty,
1614                   binop( opXOR,
1615                          assignNew('V', mce,ty, binop(opADD, a_min, b_min)),
1616                          assignNew('V', mce,ty, binop(opADD, a_max, b_max))
1617                   )
1618                )
1619         )
1620      );
1621   } else {
1622      // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1623      return
1624      assignNew('V', mce,ty,
1625         binop( opOR,
1626                assignNew('V', mce,ty, binop(opOR, qaa, qbb)),
1627                assignNew('V', mce,ty,
1628                   binop( opXOR,
1629                          assignNew('V', mce,ty, binop(opSUB, a_min, b_max)),
1630                          assignNew('V', mce,ty, binop(opSUB, a_max, b_min))
1631                   )
1632                )
1633         )
1634      );
1635   }
1636
1637}
1638
1639
1640/*------------------------------------------------------------*/
1641/*--- Scalar shifts.                                       ---*/
1642/*------------------------------------------------------------*/
1643
1644/* Produce an interpretation for (aa << bb) (or >>s, >>u).  The basic
1645   idea is to shift the definedness bits by the original shift amount.
1646   This introduces 0s ("defined") in new positions for left shifts and
1647   unsigned right shifts, and copies the top definedness bit for
1648   signed right shifts.  So, conveniently, applying the original shift
1649   operator to the definedness bits for the left arg is exactly the
1650   right thing to do:
1651
1652      (qaa << bb)
1653
1654   However if the shift amount is undefined then the whole result
1655   is undefined.  Hence need:
1656
1657      (qaa << bb) `UifU` PCast(qbb)
1658
1659   If the shift amount bb is a literal than qbb will say 'all defined'
1660   and the UifU and PCast will get folded out by post-instrumentation
1661   optimisation.
1662*/
1663static IRAtom* scalarShift ( MCEnv*  mce,
1664                             IRType  ty,
1665                             IROp    original_op,
1666                             IRAtom* qaa, IRAtom* qbb,
1667                             IRAtom* aa,  IRAtom* bb )
1668{
1669   tl_assert(isShadowAtom(mce,qaa));
1670   tl_assert(isShadowAtom(mce,qbb));
1671   tl_assert(isOriginalAtom(mce,aa));
1672   tl_assert(isOriginalAtom(mce,bb));
1673   tl_assert(sameKindedAtoms(qaa,aa));
1674   tl_assert(sameKindedAtoms(qbb,bb));
1675   return
1676      assignNew(
1677         'V', mce, ty,
1678         mkUifU( mce, ty,
1679                 assignNew('V', mce, ty, binop(original_op, qaa, bb)),
1680                 mkPCastTo(mce, ty, qbb)
1681         )
1682   );
1683}
1684
1685
1686/*------------------------------------------------------------*/
1687/*--- Helpers for dealing with vector primops.             ---*/
1688/*------------------------------------------------------------*/
1689
1690/* Vector pessimisation -- pessimise within each lane individually. */
1691
1692static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1693{
1694   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1695}
1696
1697static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1698{
1699   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1700}
1701
1702static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1703{
1704   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1705}
1706
1707static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1708{
1709   return assignNew('V', mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1710}
1711
1712static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1713{
1714   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1715}
1716
1717static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1718{
1719   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1720}
1721
1722static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1723{
1724   return assignNew('V', mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1725}
1726
1727static IRAtom* mkPCast16x2 ( MCEnv* mce, IRAtom* at )
1728{
1729   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ16x2, at));
1730}
1731
1732static IRAtom* mkPCast8x4 ( MCEnv* mce, IRAtom* at )
1733{
1734   return assignNew('V', mce, Ity_I32, unop(Iop_CmpNEZ8x4, at));
1735}
1736
1737
1738/* Here's a simple scheme capable of handling ops derived from SSE1
1739   code and while only generating ops that can be efficiently
1740   implemented in SSE1. */
1741
1742/* All-lanes versions are straightforward:
1743
1744   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1745
1746   unary32Fx4(x,y)    ==> PCast32x4(x#)
1747
1748   Lowest-lane-only versions are more complex:
1749
1750   binary32F0x4(x,y)  ==> SetV128lo32(
1751                             x#,
1752                             PCast32(V128to32(UifUV128(x#,y#)))
1753                          )
1754
1755   This is perhaps not so obvious.  In particular, it's faster to
1756   do a V128-bit UifU and then take the bottom 32 bits than the more
1757   obvious scheme of taking the bottom 32 bits of each operand
1758   and doing a 32-bit UifU.  Basically since UifU is fast and
1759   chopping lanes off vector values is slow.
1760
1761   Finally:
1762
1763   unary32F0x4(x)     ==> SetV128lo32(
1764                             x#,
1765                             PCast32(V128to32(x#))
1766                          )
1767
1768   Where:
1769
1770   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1771   PCast32x4(v#) = CmpNEZ32x4(v#)
1772*/
1773
1774static
1775IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1776{
1777   IRAtom* at;
1778   tl_assert(isShadowAtom(mce, vatomX));
1779   tl_assert(isShadowAtom(mce, vatomY));
1780   at = mkUifUV128(mce, vatomX, vatomY);
1781   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, at));
1782   return at;
1783}
1784
1785static
1786IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1787{
1788   IRAtom* at;
1789   tl_assert(isShadowAtom(mce, vatomX));
1790   at = assignNew('V', mce, Ity_V128, mkPCast32x4(mce, vatomX));
1791   return at;
1792}
1793
1794static
1795IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1796{
1797   IRAtom* at;
1798   tl_assert(isShadowAtom(mce, vatomX));
1799   tl_assert(isShadowAtom(mce, vatomY));
1800   at = mkUifUV128(mce, vatomX, vatomY);
1801   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, at));
1802   at = mkPCastTo(mce, Ity_I32, at);
1803   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1804   return at;
1805}
1806
1807static
1808IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1809{
1810   IRAtom* at;
1811   tl_assert(isShadowAtom(mce, vatomX));
1812   at = assignNew('V', mce, Ity_I32, unop(Iop_V128to32, vatomX));
1813   at = mkPCastTo(mce, Ity_I32, at);
1814   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1815   return at;
1816}
1817
1818/* --- ... and ... 64Fx2 versions of the same ... --- */
1819
1820static
1821IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1822{
1823   IRAtom* at;
1824   tl_assert(isShadowAtom(mce, vatomX));
1825   tl_assert(isShadowAtom(mce, vatomY));
1826   at = mkUifUV128(mce, vatomX, vatomY);
1827   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, at));
1828   return at;
1829}
1830
1831static
1832IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1833{
1834   IRAtom* at;
1835   tl_assert(isShadowAtom(mce, vatomX));
1836   at = assignNew('V', mce, Ity_V128, mkPCast64x2(mce, vatomX));
1837   return at;
1838}
1839
1840static
1841IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1842{
1843   IRAtom* at;
1844   tl_assert(isShadowAtom(mce, vatomX));
1845   tl_assert(isShadowAtom(mce, vatomY));
1846   at = mkUifUV128(mce, vatomX, vatomY);
1847   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, at));
1848   at = mkPCastTo(mce, Ity_I64, at);
1849   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1850   return at;
1851}
1852
1853static
1854IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1855{
1856   IRAtom* at;
1857   tl_assert(isShadowAtom(mce, vatomX));
1858   at = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vatomX));
1859   at = mkPCastTo(mce, Ity_I64, at);
1860   at = assignNew('V', mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1861   return at;
1862}
1863
1864/* --- --- ... and ... 32Fx2 versions of the same --- --- */
1865
1866static
1867IRAtom* binary32Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1868{
1869   IRAtom* at;
1870   tl_assert(isShadowAtom(mce, vatomX));
1871   tl_assert(isShadowAtom(mce, vatomY));
1872   at = mkUifU64(mce, vatomX, vatomY);
1873   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, at));
1874   return at;
1875}
1876
1877static
1878IRAtom* unary32Fx2 ( MCEnv* mce, IRAtom* vatomX )
1879{
1880   IRAtom* at;
1881   tl_assert(isShadowAtom(mce, vatomX));
1882   at = assignNew('V', mce, Ity_I64, mkPCast32x2(mce, vatomX));
1883   return at;
1884}
1885
1886/* --- --- Vector saturated narrowing --- --- */
1887
1888/* This is quite subtle.  What to do is simple:
1889
1890   Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1891
1892      the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1893
1894   Why this is right is not so simple.  Consider a lane in the args,
1895   vatom1 or 2, doesn't matter.
1896
1897   After the PCast, that lane is all 0s (defined) or all
1898   1s(undefined).
1899
1900   Both signed and unsigned saturating narrowing of all 0s produces
1901   all 0s, which is what we want.
1902
1903   The all-1s case is more complex.  Unsigned narrowing interprets an
1904   all-1s input as the largest unsigned integer, and so produces all
1905   1s as a result since that is the largest unsigned value at the
1906   smaller width.
1907
1908   Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1909   to -1, so we still wind up with all 1s at the smaller width.
1910
1911   So: In short, pessimise the args, then apply the original narrowing
1912   op.
1913*/
1914static
1915IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1916                          IRAtom* vatom1, IRAtom* vatom2)
1917{
1918   IRAtom *at1, *at2, *at3;
1919   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1920   switch (narrow_op) {
1921      case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1922      case Iop_QNarrow32Ux4: pcast = mkPCast32x4; break;
1923      case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1924      case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1925      default: VG_(tool_panic)("vectorNarrowV128");
1926   }
1927   tl_assert(isShadowAtom(mce,vatom1));
1928   tl_assert(isShadowAtom(mce,vatom2));
1929   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1930   at2 = assignNew('V', mce, Ity_V128, pcast(mce, vatom2));
1931   at3 = assignNew('V', mce, Ity_V128, binop(narrow_op, at1, at2));
1932   return at3;
1933}
1934
1935static
1936IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1937                         IRAtom* vatom1, IRAtom* vatom2)
1938{
1939   IRAtom *at1, *at2, *at3;
1940   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1941   switch (narrow_op) {
1942      case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1943      case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1944      case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1945      default: VG_(tool_panic)("vectorNarrow64");
1946   }
1947   tl_assert(isShadowAtom(mce,vatom1));
1948   tl_assert(isShadowAtom(mce,vatom2));
1949   at1 = assignNew('V', mce, Ity_I64, pcast(mce, vatom1));
1950   at2 = assignNew('V', mce, Ity_I64, pcast(mce, vatom2));
1951   at3 = assignNew('V', mce, Ity_I64, binop(narrow_op, at1, at2));
1952   return at3;
1953}
1954
1955static
1956IRAtom* vectorShortenV128 ( MCEnv* mce, IROp shorten_op,
1957                          IRAtom* vatom1)
1958{
1959   IRAtom *at1, *at2;
1960   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1961   switch (shorten_op) {
1962      case Iop_Shorten16x8: pcast = mkPCast16x8; break;
1963      case Iop_Shorten32x4: pcast = mkPCast32x4; break;
1964      case Iop_Shorten64x2: pcast = mkPCast64x2; break;
1965      case Iop_QShortenS16Sx8: pcast = mkPCast16x8; break;
1966      case Iop_QShortenU16Sx8: pcast = mkPCast16x8; break;
1967      case Iop_QShortenU16Ux8: pcast = mkPCast16x8; break;
1968      case Iop_QShortenS32Sx4: pcast = mkPCast32x4; break;
1969      case Iop_QShortenU32Sx4: pcast = mkPCast32x4; break;
1970      case Iop_QShortenU32Ux4: pcast = mkPCast32x4; break;
1971      case Iop_QShortenS64Sx2: pcast = mkPCast64x2; break;
1972      case Iop_QShortenU64Sx2: pcast = mkPCast64x2; break;
1973      case Iop_QShortenU64Ux2: pcast = mkPCast64x2; break;
1974      default: VG_(tool_panic)("vectorShortenV128");
1975   }
1976   tl_assert(isShadowAtom(mce,vatom1));
1977   at1 = assignNew('V', mce, Ity_V128, pcast(mce, vatom1));
1978   at2 = assignNew('V', mce, Ity_I64, unop(shorten_op, at1));
1979   return at2;
1980}
1981
1982static
1983IRAtom* vectorLongenI64 ( MCEnv* mce, IROp longen_op,
1984                           IRAtom* vatom1)
1985{
1986   IRAtom *at1, *at2;
1987   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1988   switch (longen_op) {
1989      case Iop_Longen8Ux8: pcast = mkPCast16x8; break;
1990      case Iop_Longen8Sx8: pcast = mkPCast16x8; break;
1991      case Iop_Longen16Ux4: pcast = mkPCast32x4; break;
1992      case Iop_Longen16Sx4: pcast = mkPCast32x4; break;
1993      case Iop_Longen32Ux2: pcast = mkPCast64x2; break;
1994      case Iop_Longen32Sx2: pcast = mkPCast64x2; break;
1995      default: VG_(tool_panic)("vectorLongenI64");
1996   }
1997   tl_assert(isShadowAtom(mce,vatom1));
1998   at1 = assignNew('V', mce, Ity_V128, unop(longen_op, vatom1));
1999   at2 = assignNew('V', mce, Ity_V128, pcast(mce, at1));
2000   return at2;
2001}
2002
2003
2004/* --- --- Vector integer arithmetic --- --- */
2005
2006/* Simple ... UifU the args and per-lane pessimise the results. */
2007
2008/* --- V128-bit versions --- */
2009
2010static
2011IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2012{
2013   IRAtom* at;
2014   at = mkUifUV128(mce, vatom1, vatom2);
2015   at = mkPCast8x16(mce, at);
2016   return at;
2017}
2018
2019static
2020IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2021{
2022   IRAtom* at;
2023   at = mkUifUV128(mce, vatom1, vatom2);
2024   at = mkPCast16x8(mce, at);
2025   return at;
2026}
2027
2028static
2029IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2030{
2031   IRAtom* at;
2032   at = mkUifUV128(mce, vatom1, vatom2);
2033   at = mkPCast32x4(mce, at);
2034   return at;
2035}
2036
2037static
2038IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2039{
2040   IRAtom* at;
2041   at = mkUifUV128(mce, vatom1, vatom2);
2042   at = mkPCast64x2(mce, at);
2043   return at;
2044}
2045
2046/* --- 64-bit versions --- */
2047
2048static
2049IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2050{
2051   IRAtom* at;
2052   at = mkUifU64(mce, vatom1, vatom2);
2053   at = mkPCast8x8(mce, at);
2054   return at;
2055}
2056
2057static
2058IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2059{
2060   IRAtom* at;
2061   at = mkUifU64(mce, vatom1, vatom2);
2062   at = mkPCast16x4(mce, at);
2063   return at;
2064}
2065
2066static
2067IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2068{
2069   IRAtom* at;
2070   at = mkUifU64(mce, vatom1, vatom2);
2071   at = mkPCast32x2(mce, at);
2072   return at;
2073}
2074
2075static
2076IRAtom* binary64Ix1 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2077{
2078   IRAtom* at;
2079   at = mkUifU64(mce, vatom1, vatom2);
2080   at = mkPCastTo(mce, Ity_I64, at);
2081   return at;
2082}
2083
2084/* --- 32-bit versions --- */
2085
2086static
2087IRAtom* binary8Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2088{
2089   IRAtom* at;
2090   at = mkUifU32(mce, vatom1, vatom2);
2091   at = mkPCast8x4(mce, at);
2092   return at;
2093}
2094
2095static
2096IRAtom* binary16Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
2097{
2098   IRAtom* at;
2099   at = mkUifU32(mce, vatom1, vatom2);
2100   at = mkPCast16x2(mce, at);
2101   return at;
2102}
2103
2104
2105/*------------------------------------------------------------*/
2106/*--- Generate shadow values from all kinds of IRExprs.    ---*/
2107/*------------------------------------------------------------*/
2108
2109static
2110IRAtom* expr2vbits_Qop ( MCEnv* mce,
2111                         IROp op,
2112                         IRAtom* atom1, IRAtom* atom2,
2113                         IRAtom* atom3, IRAtom* atom4 )
2114{
2115   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2116   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2117   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2118   IRAtom* vatom4 = expr2vbits( mce, atom4 );
2119
2120   tl_assert(isOriginalAtom(mce,atom1));
2121   tl_assert(isOriginalAtom(mce,atom2));
2122   tl_assert(isOriginalAtom(mce,atom3));
2123   tl_assert(isOriginalAtom(mce,atom4));
2124   tl_assert(isShadowAtom(mce,vatom1));
2125   tl_assert(isShadowAtom(mce,vatom2));
2126   tl_assert(isShadowAtom(mce,vatom3));
2127   tl_assert(isShadowAtom(mce,vatom4));
2128   tl_assert(sameKindedAtoms(atom1,vatom1));
2129   tl_assert(sameKindedAtoms(atom2,vatom2));
2130   tl_assert(sameKindedAtoms(atom3,vatom3));
2131   tl_assert(sameKindedAtoms(atom4,vatom4));
2132   switch (op) {
2133      case Iop_MAddF64:
2134      case Iop_MAddF64r32:
2135      case Iop_MSubF64:
2136      case Iop_MSubF64r32:
2137         /* I32(rm) x F64 x F64 x F64 -> F64 */
2138         return mkLazy4(mce, Ity_I64, vatom1, vatom2, vatom3, vatom4);
2139      default:
2140         ppIROp(op);
2141         VG_(tool_panic)("memcheck:expr2vbits_Qop");
2142   }
2143}
2144
2145
2146static
2147IRAtom* expr2vbits_Triop ( MCEnv* mce,
2148                           IROp op,
2149                           IRAtom* atom1, IRAtom* atom2, IRAtom* atom3 )
2150{
2151   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2152   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2153   IRAtom* vatom3 = expr2vbits( mce, atom3 );
2154
2155   tl_assert(isOriginalAtom(mce,atom1));
2156   tl_assert(isOriginalAtom(mce,atom2));
2157   tl_assert(isOriginalAtom(mce,atom3));
2158   tl_assert(isShadowAtom(mce,vatom1));
2159   tl_assert(isShadowAtom(mce,vatom2));
2160   tl_assert(isShadowAtom(mce,vatom3));
2161   tl_assert(sameKindedAtoms(atom1,vatom1));
2162   tl_assert(sameKindedAtoms(atom2,vatom2));
2163   tl_assert(sameKindedAtoms(atom3,vatom3));
2164   switch (op) {
2165      case Iop_AddF64:
2166      case Iop_AddF64r32:
2167      case Iop_SubF64:
2168      case Iop_SubF64r32:
2169      case Iop_MulF64:
2170      case Iop_MulF64r32:
2171      case Iop_DivF64:
2172      case Iop_DivF64r32:
2173      case Iop_ScaleF64:
2174      case Iop_Yl2xF64:
2175      case Iop_Yl2xp1F64:
2176      case Iop_AtanF64:
2177      case Iop_PRemF64:
2178      case Iop_PRem1F64:
2179         /* I32(rm) x F64 x F64 -> F64 */
2180         return mkLazy3(mce, Ity_I64, vatom1, vatom2, vatom3);
2181      case Iop_PRemC3210F64:
2182      case Iop_PRem1C3210F64:
2183         /* I32(rm) x F64 x F64 -> I32 */
2184         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2185      case Iop_AddF32:
2186      case Iop_SubF32:
2187      case Iop_MulF32:
2188      case Iop_DivF32:
2189         /* I32(rm) x F32 x F32 -> I32 */
2190         return mkLazy3(mce, Ity_I32, vatom1, vatom2, vatom3);
2191      case Iop_ExtractV128:
2192         complainIfUndefined(mce, atom3);
2193         return assignNew('V', mce, Ity_V128, triop(op, vatom1, vatom2, atom3));
2194      case Iop_Extract64:
2195         complainIfUndefined(mce, atom3);
2196         return assignNew('V', mce, Ity_I64, triop(op, vatom1, vatom2, atom3));
2197      case Iop_SetElem8x8:
2198      case Iop_SetElem16x4:
2199      case Iop_SetElem32x2:
2200         complainIfUndefined(mce, atom2);
2201         return assignNew('V', mce, Ity_I64, triop(op, vatom1, atom2, vatom3));
2202      default:
2203         ppIROp(op);
2204         VG_(tool_panic)("memcheck:expr2vbits_Triop");
2205   }
2206}
2207
2208
2209static
2210IRAtom* expr2vbits_Binop ( MCEnv* mce,
2211                           IROp op,
2212                           IRAtom* atom1, IRAtom* atom2 )
2213{
2214   IRType  and_or_ty;
2215   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
2216   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
2217   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
2218
2219   IRAtom* vatom1 = expr2vbits( mce, atom1 );
2220   IRAtom* vatom2 = expr2vbits( mce, atom2 );
2221
2222   tl_assert(isOriginalAtom(mce,atom1));
2223   tl_assert(isOriginalAtom(mce,atom2));
2224   tl_assert(isShadowAtom(mce,vatom1));
2225   tl_assert(isShadowAtom(mce,vatom2));
2226   tl_assert(sameKindedAtoms(atom1,vatom1));
2227   tl_assert(sameKindedAtoms(atom2,vatom2));
2228   switch (op) {
2229
2230      /* 32-bit SIMD */
2231
2232      case Iop_Add16x2:
2233      case Iop_HAdd16Ux2:
2234      case Iop_HAdd16Sx2:
2235      case Iop_Sub16x2:
2236      case Iop_HSub16Ux2:
2237      case Iop_HSub16Sx2:
2238      case Iop_QAdd16Sx2:
2239      case Iop_QSub16Sx2:
2240         return binary16Ix2(mce, vatom1, vatom2);
2241
2242      case Iop_Add8x4:
2243      case Iop_HAdd8Ux4:
2244      case Iop_HAdd8Sx4:
2245      case Iop_Sub8x4:
2246      case Iop_HSub8Ux4:
2247      case Iop_HSub8Sx4:
2248      case Iop_QSub8Ux4:
2249      case Iop_QAdd8Ux4:
2250      case Iop_QSub8Sx4:
2251      case Iop_QAdd8Sx4:
2252         return binary8Ix4(mce, vatom1, vatom2);
2253
2254      /* 64-bit SIMD */
2255
2256      case Iop_ShrN8x8:
2257      case Iop_ShrN16x4:
2258      case Iop_ShrN32x2:
2259      case Iop_SarN8x8:
2260      case Iop_SarN16x4:
2261      case Iop_SarN32x2:
2262      case Iop_ShlN16x4:
2263      case Iop_ShlN32x2:
2264      case Iop_ShlN8x8:
2265         /* Same scheme as with all other shifts. */
2266         complainIfUndefined(mce, atom2);
2267         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2268
2269      case Iop_QNarrow32Sx2:
2270      case Iop_QNarrow16Sx4:
2271      case Iop_QNarrow16Ux4:
2272         return vectorNarrow64(mce, op, vatom1, vatom2);
2273
2274      case Iop_Min8Ux8:
2275      case Iop_Min8Sx8:
2276      case Iop_Max8Ux8:
2277      case Iop_Max8Sx8:
2278      case Iop_Avg8Ux8:
2279      case Iop_QSub8Sx8:
2280      case Iop_QSub8Ux8:
2281      case Iop_Sub8x8:
2282      case Iop_CmpGT8Sx8:
2283      case Iop_CmpGT8Ux8:
2284      case Iop_CmpEQ8x8:
2285      case Iop_QAdd8Sx8:
2286      case Iop_QAdd8Ux8:
2287      case Iop_QSal8x8:
2288      case Iop_QShl8x8:
2289      case Iop_Add8x8:
2290      case Iop_Mul8x8:
2291      case Iop_PolynomialMul8x8:
2292         return binary8Ix8(mce, vatom1, vatom2);
2293
2294      case Iop_Min16Sx4:
2295      case Iop_Min16Ux4:
2296      case Iop_Max16Sx4:
2297      case Iop_Max16Ux4:
2298      case Iop_Avg16Ux4:
2299      case Iop_QSub16Ux4:
2300      case Iop_QSub16Sx4:
2301      case Iop_Sub16x4:
2302      case Iop_Mul16x4:
2303      case Iop_MulHi16Sx4:
2304      case Iop_MulHi16Ux4:
2305      case Iop_CmpGT16Sx4:
2306      case Iop_CmpGT16Ux4:
2307      case Iop_CmpEQ16x4:
2308      case Iop_QAdd16Sx4:
2309      case Iop_QAdd16Ux4:
2310      case Iop_QSal16x4:
2311      case Iop_QShl16x4:
2312      case Iop_Add16x4:
2313      case Iop_QDMulHi16Sx4:
2314      case Iop_QRDMulHi16Sx4:
2315         return binary16Ix4(mce, vatom1, vatom2);
2316
2317      case Iop_Sub32x2:
2318      case Iop_Mul32x2:
2319      case Iop_Max32Sx2:
2320      case Iop_Max32Ux2:
2321      case Iop_Min32Sx2:
2322      case Iop_Min32Ux2:
2323      case Iop_CmpGT32Sx2:
2324      case Iop_CmpGT32Ux2:
2325      case Iop_CmpEQ32x2:
2326      case Iop_Add32x2:
2327      case Iop_QAdd32Ux2:
2328      case Iop_QAdd32Sx2:
2329      case Iop_QSub32Ux2:
2330      case Iop_QSub32Sx2:
2331      case Iop_QSal32x2:
2332      case Iop_QShl32x2:
2333      case Iop_QDMulHi32Sx2:
2334      case Iop_QRDMulHi32Sx2:
2335         return binary32Ix2(mce, vatom1, vatom2);
2336
2337      case Iop_QSub64Ux1:
2338      case Iop_QSub64Sx1:
2339      case Iop_QAdd64Ux1:
2340      case Iop_QAdd64Sx1:
2341      case Iop_QSal64x1:
2342      case Iop_QShl64x1:
2343      case Iop_Sal64x1:
2344         return binary64Ix1(mce, vatom1, vatom2);
2345
2346      case Iop_QShlN8Sx8:
2347      case Iop_QShlN8x8:
2348      case Iop_QSalN8x8:
2349         complainIfUndefined(mce, atom2);
2350         return mkPCast8x8(mce, vatom1);
2351
2352      case Iop_QShlN16Sx4:
2353      case Iop_QShlN16x4:
2354      case Iop_QSalN16x4:
2355         complainIfUndefined(mce, atom2);
2356         return mkPCast16x4(mce, vatom1);
2357
2358      case Iop_QShlN32Sx2:
2359      case Iop_QShlN32x2:
2360      case Iop_QSalN32x2:
2361         complainIfUndefined(mce, atom2);
2362         return mkPCast32x2(mce, vatom1);
2363
2364      case Iop_QShlN64Sx1:
2365      case Iop_QShlN64x1:
2366      case Iop_QSalN64x1:
2367         complainIfUndefined(mce, atom2);
2368         return mkPCast32x2(mce, vatom1);
2369
2370      case Iop_PwMax32Sx2:
2371      case Iop_PwMax32Ux2:
2372      case Iop_PwMin32Sx2:
2373      case Iop_PwMin32Ux2:
2374      case Iop_PwMax32Fx2:
2375      case Iop_PwMin32Fx2:
2376         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax32Ux2, mkPCast32x2(mce, vatom1),
2377                     mkPCast32x2(mce, vatom2)));
2378
2379      case Iop_PwMax16Sx4:
2380      case Iop_PwMax16Ux4:
2381      case Iop_PwMin16Sx4:
2382      case Iop_PwMin16Ux4:
2383         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax16Ux4, mkPCast16x4(mce, vatom1),
2384                     mkPCast16x4(mce, vatom2)));
2385
2386      case Iop_PwMax8Sx8:
2387      case Iop_PwMax8Ux8:
2388      case Iop_PwMin8Sx8:
2389      case Iop_PwMin8Ux8:
2390         return assignNew('V', mce, Ity_I64, binop(Iop_PwMax8Ux8, mkPCast8x8(mce, vatom1),
2391                     mkPCast8x8(mce, vatom2)));
2392
2393      case Iop_PwAdd32x2:
2394      case Iop_PwAdd32Fx2:
2395         return mkPCast32x2(mce,
2396               assignNew('V', mce, Ity_I64, binop(Iop_PwAdd32x2, mkPCast32x2(mce, vatom1),
2397                     mkPCast32x2(mce, vatom2))));
2398
2399      case Iop_PwAdd16x4:
2400         return mkPCast16x4(mce,
2401               assignNew('V', mce, Ity_I64, binop(op, mkPCast16x4(mce, vatom1),
2402                     mkPCast16x4(mce, vatom2))));
2403
2404      case Iop_PwAdd8x8:
2405         return mkPCast8x8(mce,
2406               assignNew('V', mce, Ity_I64, binop(op, mkPCast8x8(mce, vatom1),
2407                     mkPCast8x8(mce, vatom2))));
2408
2409      case Iop_Shl8x8:
2410      case Iop_Shr8x8:
2411      case Iop_Sar8x8:
2412      case Iop_Sal8x8:
2413         return mkUifU64(mce,
2414                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2415                   mkPCast8x8(mce,vatom2)
2416                );
2417
2418      case Iop_Shl16x4:
2419      case Iop_Shr16x4:
2420      case Iop_Sar16x4:
2421      case Iop_Sal16x4:
2422         return mkUifU64(mce,
2423                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2424                   mkPCast16x4(mce,vatom2)
2425                );
2426
2427      case Iop_Shl32x2:
2428      case Iop_Shr32x2:
2429      case Iop_Sar32x2:
2430      case Iop_Sal32x2:
2431         return mkUifU64(mce,
2432                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2433                   mkPCast32x2(mce,vatom2)
2434                );
2435
2436      /* 64-bit data-steering */
2437      case Iop_InterleaveLO32x2:
2438      case Iop_InterleaveLO16x4:
2439      case Iop_InterleaveLO8x8:
2440      case Iop_InterleaveHI32x2:
2441      case Iop_InterleaveHI16x4:
2442      case Iop_InterleaveHI8x8:
2443      case Iop_CatOddLanes8x8:
2444      case Iop_CatEvenLanes8x8:
2445      case Iop_CatOddLanes16x4:
2446      case Iop_CatEvenLanes16x4:
2447      case Iop_InterleaveOddLanes8x8:
2448      case Iop_InterleaveEvenLanes8x8:
2449      case Iop_InterleaveOddLanes16x4:
2450      case Iop_InterleaveEvenLanes16x4:
2451         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2452
2453      case Iop_GetElem8x8:
2454         complainIfUndefined(mce, atom2);
2455         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2456      case Iop_GetElem16x4:
2457         complainIfUndefined(mce, atom2);
2458         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2459      case Iop_GetElem32x2:
2460         complainIfUndefined(mce, atom2);
2461         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2462
2463      /* Perm8x8: rearrange values in left arg using steering values
2464        from right arg.  So rearrange the vbits in the same way but
2465        pessimise wrt steering values. */
2466      case Iop_Perm8x8:
2467         return mkUifU64(
2468                   mce,
2469                   assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2)),
2470                   mkPCast8x8(mce, vatom2)
2471                );
2472
2473      /* V128-bit SIMD */
2474
2475      case Iop_ShrN8x16:
2476      case Iop_ShrN16x8:
2477      case Iop_ShrN32x4:
2478      case Iop_ShrN64x2:
2479      case Iop_SarN8x16:
2480      case Iop_SarN16x8:
2481      case Iop_SarN32x4:
2482      case Iop_SarN64x2:
2483      case Iop_ShlN8x16:
2484      case Iop_ShlN16x8:
2485      case Iop_ShlN32x4:
2486      case Iop_ShlN64x2:
2487         /* Same scheme as with all other shifts.  Note: 22 Oct 05:
2488            this is wrong now, scalar shifts are done properly lazily.
2489            Vector shifts should be fixed too. */
2490         complainIfUndefined(mce, atom2);
2491         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2492
2493      /* V x V shifts/rotates are done using the standard lazy scheme. */
2494      case Iop_Shl8x16:
2495      case Iop_Shr8x16:
2496      case Iop_Sar8x16:
2497      case Iop_Sal8x16:
2498      case Iop_Rol8x16:
2499         return mkUifUV128(mce,
2500                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2501                   mkPCast8x16(mce,vatom2)
2502                );
2503
2504      case Iop_Shl16x8:
2505      case Iop_Shr16x8:
2506      case Iop_Sar16x8:
2507      case Iop_Sal16x8:
2508      case Iop_Rol16x8:
2509         return mkUifUV128(mce,
2510                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2511                   mkPCast16x8(mce,vatom2)
2512                );
2513
2514      case Iop_Shl32x4:
2515      case Iop_Shr32x4:
2516      case Iop_Sar32x4:
2517      case Iop_Sal32x4:
2518      case Iop_Rol32x4:
2519         return mkUifUV128(mce,
2520                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2521                   mkPCast32x4(mce,vatom2)
2522                );
2523
2524      case Iop_Shl64x2:
2525      case Iop_Shr64x2:
2526      case Iop_Sar64x2:
2527      case Iop_Sal64x2:
2528         return mkUifUV128(mce,
2529                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2530                   mkPCast64x2(mce,vatom2)
2531                );
2532
2533      case Iop_F32ToFixed32Ux4_RZ:
2534      case Iop_F32ToFixed32Sx4_RZ:
2535      case Iop_Fixed32UToF32x4_RN:
2536      case Iop_Fixed32SToF32x4_RN:
2537         complainIfUndefined(mce, atom2);
2538         return mkPCast32x4(mce, vatom1);
2539
2540      case Iop_F32ToFixed32Ux2_RZ:
2541      case Iop_F32ToFixed32Sx2_RZ:
2542      case Iop_Fixed32UToF32x2_RN:
2543      case Iop_Fixed32SToF32x2_RN:
2544         complainIfUndefined(mce, atom2);
2545         return mkPCast32x2(mce, vatom1);
2546
2547      case Iop_QSub8Ux16:
2548      case Iop_QSub8Sx16:
2549      case Iop_Sub8x16:
2550      case Iop_Min8Ux16:
2551      case Iop_Min8Sx16:
2552      case Iop_Max8Ux16:
2553      case Iop_Max8Sx16:
2554      case Iop_CmpGT8Sx16:
2555      case Iop_CmpGT8Ux16:
2556      case Iop_CmpEQ8x16:
2557      case Iop_Avg8Ux16:
2558      case Iop_Avg8Sx16:
2559      case Iop_QAdd8Ux16:
2560      case Iop_QAdd8Sx16:
2561      case Iop_QSal8x16:
2562      case Iop_QShl8x16:
2563      case Iop_Add8x16:
2564      case Iop_Mul8x16:
2565      case Iop_PolynomialMul8x16:
2566         return binary8Ix16(mce, vatom1, vatom2);
2567
2568      case Iop_QSub16Ux8:
2569      case Iop_QSub16Sx8:
2570      case Iop_Sub16x8:
2571      case Iop_Mul16x8:
2572      case Iop_MulHi16Sx8:
2573      case Iop_MulHi16Ux8:
2574      case Iop_Min16Sx8:
2575      case Iop_Min16Ux8:
2576      case Iop_Max16Sx8:
2577      case Iop_Max16Ux8:
2578      case Iop_CmpGT16Sx8:
2579      case Iop_CmpGT16Ux8:
2580      case Iop_CmpEQ16x8:
2581      case Iop_Avg16Ux8:
2582      case Iop_Avg16Sx8:
2583      case Iop_QAdd16Ux8:
2584      case Iop_QAdd16Sx8:
2585      case Iop_QSal16x8:
2586      case Iop_QShl16x8:
2587      case Iop_Add16x8:
2588      case Iop_QDMulHi16Sx8:
2589      case Iop_QRDMulHi16Sx8:
2590         return binary16Ix8(mce, vatom1, vatom2);
2591
2592      case Iop_Sub32x4:
2593      case Iop_CmpGT32Sx4:
2594      case Iop_CmpGT32Ux4:
2595      case Iop_CmpEQ32x4:
2596      case Iop_QAdd32Sx4:
2597      case Iop_QAdd32Ux4:
2598      case Iop_QSub32Sx4:
2599      case Iop_QSub32Ux4:
2600      case Iop_QSal32x4:
2601      case Iop_QShl32x4:
2602      case Iop_Avg32Ux4:
2603      case Iop_Avg32Sx4:
2604      case Iop_Add32x4:
2605      case Iop_Max32Ux4:
2606      case Iop_Max32Sx4:
2607      case Iop_Min32Ux4:
2608      case Iop_Min32Sx4:
2609      case Iop_Mul32x4:
2610      case Iop_QDMulHi32Sx4:
2611      case Iop_QRDMulHi32Sx4:
2612         return binary32Ix4(mce, vatom1, vatom2);
2613
2614      case Iop_Sub64x2:
2615      case Iop_Add64x2:
2616      case Iop_CmpGT64Sx2:
2617      case Iop_QSal64x2:
2618      case Iop_QShl64x2:
2619      case Iop_QAdd64Ux2:
2620      case Iop_QAdd64Sx2:
2621      case Iop_QSub64Ux2:
2622      case Iop_QSub64Sx2:
2623         return binary64Ix2(mce, vatom1, vatom2);
2624
2625      case Iop_QNarrow32Sx4:
2626      case Iop_QNarrow32Ux4:
2627      case Iop_QNarrow16Sx8:
2628      case Iop_QNarrow16Ux8:
2629         return vectorNarrowV128(mce, op, vatom1, vatom2);
2630
2631      case Iop_Sub64Fx2:
2632      case Iop_Mul64Fx2:
2633      case Iop_Min64Fx2:
2634      case Iop_Max64Fx2:
2635      case Iop_Div64Fx2:
2636      case Iop_CmpLT64Fx2:
2637      case Iop_CmpLE64Fx2:
2638      case Iop_CmpEQ64Fx2:
2639      case Iop_CmpUN64Fx2:
2640      case Iop_Add64Fx2:
2641         return binary64Fx2(mce, vatom1, vatom2);
2642
2643      case Iop_Sub64F0x2:
2644      case Iop_Mul64F0x2:
2645      case Iop_Min64F0x2:
2646      case Iop_Max64F0x2:
2647      case Iop_Div64F0x2:
2648      case Iop_CmpLT64F0x2:
2649      case Iop_CmpLE64F0x2:
2650      case Iop_CmpEQ64F0x2:
2651      case Iop_CmpUN64F0x2:
2652      case Iop_Add64F0x2:
2653         return binary64F0x2(mce, vatom1, vatom2);
2654
2655      case Iop_Sub32Fx4:
2656      case Iop_Mul32Fx4:
2657      case Iop_Min32Fx4:
2658      case Iop_Max32Fx4:
2659      case Iop_Div32Fx4:
2660      case Iop_CmpLT32Fx4:
2661      case Iop_CmpLE32Fx4:
2662      case Iop_CmpEQ32Fx4:
2663      case Iop_CmpUN32Fx4:
2664      case Iop_CmpGT32Fx4:
2665      case Iop_CmpGE32Fx4:
2666      case Iop_Add32Fx4:
2667      case Iop_Recps32Fx4:
2668      case Iop_Rsqrts32Fx4:
2669         return binary32Fx4(mce, vatom1, vatom2);
2670
2671      case Iop_Sub32Fx2:
2672      case Iop_Mul32Fx2:
2673      case Iop_Min32Fx2:
2674      case Iop_Max32Fx2:
2675      case Iop_CmpEQ32Fx2:
2676      case Iop_CmpGT32Fx2:
2677      case Iop_CmpGE32Fx2:
2678      case Iop_Add32Fx2:
2679      case Iop_Recps32Fx2:
2680      case Iop_Rsqrts32Fx2:
2681         return binary32Fx2(mce, vatom1, vatom2);
2682
2683      case Iop_Sub32F0x4:
2684      case Iop_Mul32F0x4:
2685      case Iop_Min32F0x4:
2686      case Iop_Max32F0x4:
2687      case Iop_Div32F0x4:
2688      case Iop_CmpLT32F0x4:
2689      case Iop_CmpLE32F0x4:
2690      case Iop_CmpEQ32F0x4:
2691      case Iop_CmpUN32F0x4:
2692      case Iop_Add32F0x4:
2693         return binary32F0x4(mce, vatom1, vatom2);
2694
2695      case Iop_QShlN8Sx16:
2696      case Iop_QShlN8x16:
2697      case Iop_QSalN8x16:
2698         complainIfUndefined(mce, atom2);
2699         return mkPCast8x16(mce, vatom1);
2700
2701      case Iop_QShlN16Sx8:
2702      case Iop_QShlN16x8:
2703      case Iop_QSalN16x8:
2704         complainIfUndefined(mce, atom2);
2705         return mkPCast16x8(mce, vatom1);
2706
2707      case Iop_QShlN32Sx4:
2708      case Iop_QShlN32x4:
2709      case Iop_QSalN32x4:
2710         complainIfUndefined(mce, atom2);
2711         return mkPCast32x4(mce, vatom1);
2712
2713      case Iop_QShlN64Sx2:
2714      case Iop_QShlN64x2:
2715      case Iop_QSalN64x2:
2716         complainIfUndefined(mce, atom2);
2717         return mkPCast32x4(mce, vatom1);
2718
2719      case Iop_Mull32Sx2:
2720      case Iop_Mull32Ux2:
2721      case Iop_QDMulLong32Sx2:
2722         return vectorLongenI64(mce, Iop_Longen32Sx2,
2723               mkUifU64(mce, vatom1, vatom2));
2724
2725      case Iop_Mull16Sx4:
2726      case Iop_Mull16Ux4:
2727      case Iop_QDMulLong16Sx4:
2728         return vectorLongenI64(mce, Iop_Longen16Sx4,
2729               mkUifU64(mce, vatom1, vatom2));
2730
2731      case Iop_Mull8Sx8:
2732      case Iop_Mull8Ux8:
2733      case Iop_PolynomialMull8x8:
2734         return vectorLongenI64(mce, Iop_Longen8Sx8,
2735               mkUifU64(mce, vatom1, vatom2));
2736
2737      case Iop_PwAdd32x4:
2738         return mkPCast32x4(mce,
2739               assignNew('V', mce, Ity_V128, binop(op, mkPCast32x4(mce, vatom1),
2740                     mkPCast32x4(mce, vatom2))));
2741
2742      case Iop_PwAdd16x8:
2743         return mkPCast16x8(mce,
2744               assignNew('V', mce, Ity_V128, binop(op, mkPCast16x8(mce, vatom1),
2745                     mkPCast16x8(mce, vatom2))));
2746
2747      case Iop_PwAdd8x16:
2748         return mkPCast8x16(mce,
2749               assignNew('V', mce, Ity_V128, binop(op, mkPCast8x16(mce, vatom1),
2750                     mkPCast8x16(mce, vatom2))));
2751
2752      /* V128-bit data-steering */
2753      case Iop_SetV128lo32:
2754      case Iop_SetV128lo64:
2755      case Iop_64HLtoV128:
2756      case Iop_InterleaveLO64x2:
2757      case Iop_InterleaveLO32x4:
2758      case Iop_InterleaveLO16x8:
2759      case Iop_InterleaveLO8x16:
2760      case Iop_InterleaveHI64x2:
2761      case Iop_InterleaveHI32x4:
2762      case Iop_InterleaveHI16x8:
2763      case Iop_InterleaveHI8x16:
2764      case Iop_CatOddLanes8x16:
2765      case Iop_CatOddLanes16x8:
2766      case Iop_CatOddLanes32x4:
2767      case Iop_CatEvenLanes8x16:
2768      case Iop_CatEvenLanes16x8:
2769      case Iop_CatEvenLanes32x4:
2770      case Iop_InterleaveOddLanes8x16:
2771      case Iop_InterleaveOddLanes16x8:
2772      case Iop_InterleaveOddLanes32x4:
2773      case Iop_InterleaveEvenLanes8x16:
2774      case Iop_InterleaveEvenLanes16x8:
2775      case Iop_InterleaveEvenLanes32x4:
2776         return assignNew('V', mce, Ity_V128, binop(op, vatom1, vatom2));
2777
2778      case Iop_GetElem8x16:
2779         complainIfUndefined(mce, atom2);
2780         return assignNew('V', mce, Ity_I8, binop(op, vatom1, atom2));
2781      case Iop_GetElem16x8:
2782         complainIfUndefined(mce, atom2);
2783         return assignNew('V', mce, Ity_I16, binop(op, vatom1, atom2));
2784      case Iop_GetElem32x4:
2785         complainIfUndefined(mce, atom2);
2786         return assignNew('V', mce, Ity_I32, binop(op, vatom1, atom2));
2787      case Iop_GetElem64x2:
2788         complainIfUndefined(mce, atom2);
2789         return assignNew('V', mce, Ity_I64, binop(op, vatom1, atom2));
2790
2791     /* Perm8x16: rearrange values in left arg using steering values
2792        from right arg.  So rearrange the vbits in the same way but
2793        pessimise wrt steering values. */
2794      case Iop_Perm8x16:
2795         return mkUifUV128(
2796                   mce,
2797                   assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2)),
2798                   mkPCast8x16(mce, vatom2)
2799                );
2800
2801     /* These two take the lower half of each 16-bit lane, sign/zero
2802        extend it to 32, and multiply together, producing a 32x4
2803        result (and implicitly ignoring half the operand bits).  So
2804        treat it as a bunch of independent 16x8 operations, but then
2805        do 32-bit shifts left-right to copy the lower half results
2806        (which are all 0s or all 1s due to PCasting in binary16Ix8)
2807        into the upper half of each result lane. */
2808      case Iop_MullEven16Ux8:
2809      case Iop_MullEven16Sx8: {
2810         IRAtom* at;
2811         at = binary16Ix8(mce,vatom1,vatom2);
2812         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN32x4, at, mkU8(16)));
2813         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN32x4, at, mkU8(16)));
2814	 return at;
2815      }
2816
2817      /* Same deal as Iop_MullEven16{S,U}x8 */
2818      case Iop_MullEven8Ux16:
2819      case Iop_MullEven8Sx16: {
2820         IRAtom* at;
2821         at = binary8Ix16(mce,vatom1,vatom2);
2822         at = assignNew('V', mce, Ity_V128, binop(Iop_ShlN16x8, at, mkU8(8)));
2823         at = assignNew('V', mce, Ity_V128, binop(Iop_SarN16x8, at, mkU8(8)));
2824	 return at;
2825      }
2826
2827      /* narrow 2xV128 into 1xV128, hi half from left arg, in a 2 x
2828         32x4 -> 16x8 laneage, discarding the upper half of each lane.
2829         Simply apply same op to the V bits, since this really no more
2830         than a data steering operation. */
2831      case Iop_Narrow32x4:
2832      case Iop_Narrow16x8:
2833         return assignNew('V', mce, Ity_V128,
2834                                    binop(op, vatom1, vatom2));
2835
2836      case Iop_ShrV128:
2837      case Iop_ShlV128:
2838         /* Same scheme as with all other shifts.  Note: 10 Nov 05:
2839            this is wrong now, scalar shifts are done properly lazily.
2840            Vector shifts should be fixed too. */
2841         complainIfUndefined(mce, atom2);
2842         return assignNew('V', mce, Ity_V128, binop(op, vatom1, atom2));
2843
2844      /* I128-bit data-steering */
2845      case Iop_64HLto128:
2846         return assignNew('V', mce, Ity_I128, binop(op, vatom1, vatom2));
2847
2848      /* Scalar floating point */
2849
2850      case Iop_RoundF64toInt:
2851      case Iop_RoundF64toF32:
2852      case Iop_F64toI64S:
2853      case Iop_I64StoF64:
2854      case Iop_SinF64:
2855      case Iop_CosF64:
2856      case Iop_TanF64:
2857      case Iop_2xm1F64:
2858      case Iop_SqrtF64:
2859         /* I32(rm) x I64/F64 -> I64/F64 */
2860         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2861
2862      case Iop_RoundF32toInt:
2863      case Iop_SqrtF32:
2864         /* I32(rm) x I32/F32 -> I32/F32 */
2865         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2866
2867      case Iop_F64toI32U:
2868      case Iop_F64toI32S:
2869      case Iop_F64toF32:
2870         /* First arg is I32 (rounding mode), second is F64 (data). */
2871         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2872
2873      case Iop_F64toI16S:
2874         /* First arg is I32 (rounding mode), second is F64 (data). */
2875         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
2876
2877      case Iop_CmpF64:
2878         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2879
2880      /* non-FP after here */
2881
2882      case Iop_DivModU64to32:
2883      case Iop_DivModS64to32:
2884         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2885
2886      case Iop_DivModU128to64:
2887      case Iop_DivModS128to64:
2888         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
2889
2890      case Iop_16HLto32:
2891         return assignNew('V', mce, Ity_I32, binop(op, vatom1, vatom2));
2892      case Iop_32HLto64:
2893         return assignNew('V', mce, Ity_I64, binop(op, vatom1, vatom2));
2894
2895      case Iop_MullS64:
2896      case Iop_MullU64: {
2897         IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2898         IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
2899         return assignNew('V', mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
2900      }
2901
2902      case Iop_MullS32:
2903      case Iop_MullU32: {
2904         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2905         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
2906         return assignNew('V', mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
2907      }
2908
2909      case Iop_MullS16:
2910      case Iop_MullU16: {
2911         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2912         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
2913         return assignNew('V', mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
2914      }
2915
2916      case Iop_MullS8:
2917      case Iop_MullU8: {
2918         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2919         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
2920         return assignNew('V', mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
2921      }
2922
2923      case Iop_Sad8Ux4: /* maybe we could do better?  ftm, do mkLazy2. */
2924      case Iop_DivS32:
2925      case Iop_DivU32:
2926         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
2927
2928      case Iop_DivS64:
2929      case Iop_DivU64:
2930         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
2931
2932      case Iop_Add32:
2933         if (mce->bogusLiterals)
2934            return expensiveAddSub(mce,True,Ity_I32,
2935                                   vatom1,vatom2, atom1,atom2);
2936         else
2937            goto cheap_AddSub32;
2938      case Iop_Sub32:
2939         if (mce->bogusLiterals)
2940            return expensiveAddSub(mce,False,Ity_I32,
2941                                   vatom1,vatom2, atom1,atom2);
2942         else
2943            goto cheap_AddSub32;
2944
2945      cheap_AddSub32:
2946      case Iop_Mul32:
2947         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
2948
2949      case Iop_CmpORD32S:
2950      case Iop_CmpORD32U:
2951      case Iop_CmpORD64S:
2952      case Iop_CmpORD64U:
2953         return doCmpORD(mce, op, vatom1,vatom2, atom1,atom2);
2954
2955      case Iop_Add64:
2956         if (mce->bogusLiterals)
2957            return expensiveAddSub(mce,True,Ity_I64,
2958                                   vatom1,vatom2, atom1,atom2);
2959         else
2960            goto cheap_AddSub64;
2961      case Iop_Sub64:
2962         if (mce->bogusLiterals)
2963            return expensiveAddSub(mce,False,Ity_I64,
2964                                   vatom1,vatom2, atom1,atom2);
2965         else
2966            goto cheap_AddSub64;
2967
2968      cheap_AddSub64:
2969      case Iop_Mul64:
2970         return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
2971
2972      case Iop_Mul16:
2973      case Iop_Add16:
2974      case Iop_Sub16:
2975         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
2976
2977      case Iop_Sub8:
2978      case Iop_Add8:
2979         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
2980
2981      case Iop_CmpEQ64:
2982      case Iop_CmpNE64:
2983         if (mce->bogusLiterals)
2984            return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
2985         else
2986            goto cheap_cmp64;
2987      cheap_cmp64:
2988      case Iop_CmpLE64S: case Iop_CmpLE64U:
2989      case Iop_CmpLT64U: case Iop_CmpLT64S:
2990         return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
2991
2992      case Iop_CmpEQ32:
2993      case Iop_CmpNE32:
2994         if (mce->bogusLiterals)
2995            return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
2996         else
2997            goto cheap_cmp32;
2998      cheap_cmp32:
2999      case Iop_CmpLE32S: case Iop_CmpLE32U:
3000      case Iop_CmpLT32U: case Iop_CmpLT32S:
3001         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
3002
3003      case Iop_CmpEQ16: case Iop_CmpNE16:
3004         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
3005
3006      case Iop_CmpEQ8: case Iop_CmpNE8:
3007         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
3008
3009      case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
3010      case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
3011      case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
3012      case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
3013         /* Just say these all produce a defined result, regardless
3014            of their arguments.  See COMMENT_ON_CasCmpEQ in this file. */
3015         return assignNew('V', mce, Ity_I1, definedOfType(Ity_I1));
3016
3017      case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
3018         return scalarShift( mce, Ity_I64, op, vatom1,vatom2, atom1,atom2 );
3019
3020      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
3021         return scalarShift( mce, Ity_I32, op, vatom1,vatom2, atom1,atom2 );
3022
3023      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
3024         return scalarShift( mce, Ity_I16, op, vatom1,vatom2, atom1,atom2 );
3025
3026      case Iop_Shl8: case Iop_Shr8:
3027         return scalarShift( mce, Ity_I8, op, vatom1,vatom2, atom1,atom2 );
3028
3029      case Iop_AndV128:
3030         uifu = mkUifUV128; difd = mkDifDV128;
3031         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
3032      case Iop_And64:
3033         uifu = mkUifU64; difd = mkDifD64;
3034         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
3035      case Iop_And32:
3036         uifu = mkUifU32; difd = mkDifD32;
3037         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
3038      case Iop_And16:
3039         uifu = mkUifU16; difd = mkDifD16;
3040         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
3041      case Iop_And8:
3042         uifu = mkUifU8; difd = mkDifD8;
3043         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
3044
3045      case Iop_OrV128:
3046         uifu = mkUifUV128; difd = mkDifDV128;
3047         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
3048      case Iop_Or64:
3049         uifu = mkUifU64; difd = mkDifD64;
3050         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
3051      case Iop_Or32:
3052         uifu = mkUifU32; difd = mkDifD32;
3053         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
3054      case Iop_Or16:
3055         uifu = mkUifU16; difd = mkDifD16;
3056         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
3057      case Iop_Or8:
3058         uifu = mkUifU8; difd = mkDifD8;
3059         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
3060
3061      do_And_Or:
3062         return
3063         assignNew(
3064            'V', mce,
3065            and_or_ty,
3066            difd(mce, uifu(mce, vatom1, vatom2),
3067                      difd(mce, improve(mce, atom1, vatom1),
3068                                improve(mce, atom2, vatom2) ) ) );
3069
3070      case Iop_Xor8:
3071         return mkUifU8(mce, vatom1, vatom2);
3072      case Iop_Xor16:
3073         return mkUifU16(mce, vatom1, vatom2);
3074      case Iop_Xor32:
3075         return mkUifU32(mce, vatom1, vatom2);
3076      case Iop_Xor64:
3077         return mkUifU64(mce, vatom1, vatom2);
3078      case Iop_XorV128:
3079         return mkUifUV128(mce, vatom1, vatom2);
3080
3081      default:
3082         ppIROp(op);
3083         VG_(tool_panic)("memcheck:expr2vbits_Binop");
3084   }
3085}
3086
3087
3088static
3089IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
3090{
3091   IRAtom* vatom = expr2vbits( mce, atom );
3092   tl_assert(isOriginalAtom(mce,atom));
3093   switch (op) {
3094
3095      case Iop_Sqrt64Fx2:
3096         return unary64Fx2(mce, vatom);
3097
3098      case Iop_Sqrt64F0x2:
3099         return unary64F0x2(mce, vatom);
3100
3101      case Iop_Sqrt32Fx4:
3102      case Iop_RSqrt32Fx4:
3103      case Iop_Recip32Fx4:
3104      case Iop_I32UtoFx4:
3105      case Iop_I32StoFx4:
3106      case Iop_QFtoI32Ux4_RZ:
3107      case Iop_QFtoI32Sx4_RZ:
3108      case Iop_RoundF32x4_RM:
3109      case Iop_RoundF32x4_RP:
3110      case Iop_RoundF32x4_RN:
3111      case Iop_RoundF32x4_RZ:
3112      case Iop_Recip32x4:
3113      case Iop_Abs32Fx4:
3114      case Iop_Neg32Fx4:
3115      case Iop_Rsqrte32Fx4:
3116         return unary32Fx4(mce, vatom);
3117
3118      case Iop_I32UtoFx2:
3119      case Iop_I32StoFx2:
3120      case Iop_Recip32Fx2:
3121      case Iop_Recip32x2:
3122      case Iop_Abs32Fx2:
3123      case Iop_Neg32Fx2:
3124      case Iop_Rsqrte32Fx2:
3125         return unary32Fx2(mce, vatom);
3126
3127      case Iop_Sqrt32F0x4:
3128      case Iop_RSqrt32F0x4:
3129      case Iop_Recip32F0x4:
3130         return unary32F0x4(mce, vatom);
3131
3132      case Iop_32UtoV128:
3133      case Iop_64UtoV128:
3134      case Iop_Dup8x16:
3135      case Iop_Dup16x8:
3136      case Iop_Dup32x4:
3137      case Iop_Reverse16_8x16:
3138      case Iop_Reverse32_8x16:
3139      case Iop_Reverse32_16x8:
3140      case Iop_Reverse64_8x16:
3141      case Iop_Reverse64_16x8:
3142      case Iop_Reverse64_32x4:
3143         return assignNew('V', mce, Ity_V128, unop(op, vatom));
3144
3145      case Iop_F32toF64:
3146      case Iop_I32StoF64:
3147      case Iop_I32UtoF64:
3148      case Iop_NegF64:
3149      case Iop_AbsF64:
3150      case Iop_Est5FRSqrt:
3151      case Iop_RoundF64toF64_NEAREST:
3152      case Iop_RoundF64toF64_NegINF:
3153      case Iop_RoundF64toF64_PosINF:
3154      case Iop_RoundF64toF64_ZERO:
3155      case Iop_Clz64:
3156      case Iop_Ctz64:
3157         return mkPCastTo(mce, Ity_I64, vatom);
3158
3159      case Iop_Clz32:
3160      case Iop_Ctz32:
3161      case Iop_TruncF64asF32:
3162      case Iop_NegF32:
3163      case Iop_AbsF32:
3164         return mkPCastTo(mce, Ity_I32, vatom);
3165
3166      case Iop_1Uto64:
3167      case Iop_8Uto64:
3168      case Iop_8Sto64:
3169      case Iop_16Uto64:
3170      case Iop_16Sto64:
3171      case Iop_32Sto64:
3172      case Iop_32Uto64:
3173      case Iop_V128to64:
3174      case Iop_V128HIto64:
3175      case Iop_128HIto64:
3176      case Iop_128to64:
3177      case Iop_Dup8x8:
3178      case Iop_Dup16x4:
3179      case Iop_Dup32x2:
3180      case Iop_Reverse16_8x8:
3181      case Iop_Reverse32_8x8:
3182      case Iop_Reverse32_16x4:
3183      case Iop_Reverse64_8x8:
3184      case Iop_Reverse64_16x4:
3185      case Iop_Reverse64_32x2:
3186         return assignNew('V', mce, Ity_I64, unop(op, vatom));
3187
3188      case Iop_64to32:
3189      case Iop_64HIto32:
3190      case Iop_1Uto32:
3191      case Iop_1Sto32:
3192      case Iop_8Uto32:
3193      case Iop_16Uto32:
3194      case Iop_16Sto32:
3195      case Iop_8Sto32:
3196      case Iop_V128to32:
3197         return assignNew('V', mce, Ity_I32, unop(op, vatom));
3198
3199      case Iop_8Sto16:
3200      case Iop_8Uto16:
3201      case Iop_32to16:
3202      case Iop_32HIto16:
3203      case Iop_64to16:
3204         return assignNew('V', mce, Ity_I16, unop(op, vatom));
3205
3206      case Iop_1Uto8:
3207      case Iop_16to8:
3208      case Iop_16HIto8:
3209      case Iop_32to8:
3210      case Iop_64to8:
3211         return assignNew('V', mce, Ity_I8, unop(op, vatom));
3212
3213      case Iop_32to1:
3214         return assignNew('V', mce, Ity_I1, unop(Iop_32to1, vatom));
3215
3216      case Iop_64to1:
3217         return assignNew('V', mce, Ity_I1, unop(Iop_64to1, vatom));
3218
3219      case Iop_ReinterpF64asI64:
3220      case Iop_ReinterpI64asF64:
3221      case Iop_ReinterpI32asF32:
3222      case Iop_ReinterpF32asI32:
3223      case Iop_NotV128:
3224      case Iop_Not64:
3225      case Iop_Not32:
3226      case Iop_Not16:
3227      case Iop_Not8:
3228      case Iop_Not1:
3229         return vatom;
3230
3231      case Iop_CmpNEZ8x8:
3232      case Iop_Cnt8x8:
3233      case Iop_Clz8Sx8:
3234      case Iop_Cls8Sx8:
3235      case Iop_Abs8x8:
3236         return mkPCast8x8(mce, vatom);
3237
3238      case Iop_CmpNEZ8x16:
3239      case Iop_Cnt8x16:
3240      case Iop_Clz8Sx16:
3241      case Iop_Cls8Sx16:
3242      case Iop_Abs8x16:
3243         return mkPCast8x16(mce, vatom);
3244
3245      case Iop_CmpNEZ16x4:
3246      case Iop_Clz16Sx4:
3247      case Iop_Cls16Sx4:
3248      case Iop_Abs16x4:
3249         return mkPCast16x4(mce, vatom);
3250
3251      case Iop_CmpNEZ16x8:
3252      case Iop_Clz16Sx8:
3253      case Iop_Cls16Sx8:
3254      case Iop_Abs16x8:
3255         return mkPCast16x8(mce, vatom);
3256
3257      case Iop_CmpNEZ32x2:
3258      case Iop_Clz32Sx2:
3259      case Iop_Cls32Sx2:
3260      case Iop_FtoI32Ux2_RZ:
3261      case Iop_FtoI32Sx2_RZ:
3262      case Iop_Abs32x2:
3263         return mkPCast32x2(mce, vatom);
3264
3265      case Iop_CmpNEZ32x4:
3266      case Iop_Clz32Sx4:
3267      case Iop_Cls32Sx4:
3268      case Iop_FtoI32Ux4_RZ:
3269      case Iop_FtoI32Sx4_RZ:
3270      case Iop_Abs32x4:
3271         return mkPCast32x4(mce, vatom);
3272
3273      case Iop_CmpwNEZ64:
3274         return mkPCastTo(mce, Ity_I64, vatom);
3275
3276      case Iop_CmpNEZ64x2:
3277         return mkPCast64x2(mce, vatom);
3278
3279      case Iop_Shorten16x8:
3280      case Iop_Shorten32x4:
3281      case Iop_Shorten64x2:
3282      case Iop_QShortenS16Sx8:
3283      case Iop_QShortenU16Sx8:
3284      case Iop_QShortenU16Ux8:
3285      case Iop_QShortenS32Sx4:
3286      case Iop_QShortenU32Sx4:
3287      case Iop_QShortenU32Ux4:
3288      case Iop_QShortenS64Sx2:
3289      case Iop_QShortenU64Sx2:
3290      case Iop_QShortenU64Ux2:
3291         return vectorShortenV128(mce, op, vatom);
3292
3293      case Iop_Longen8Sx8:
3294      case Iop_Longen8Ux8:
3295      case Iop_Longen16Sx4:
3296      case Iop_Longen16Ux4:
3297      case Iop_Longen32Sx2:
3298      case Iop_Longen32Ux2:
3299         return vectorLongenI64(mce, op, vatom);
3300
3301      case Iop_PwAddL32Ux2:
3302      case Iop_PwAddL32Sx2:
3303         return mkPCastTo(mce, Ity_I64,
3304               assignNew('V', mce, Ity_I64, unop(op, mkPCast32x2(mce, vatom))));
3305
3306      case Iop_PwAddL16Ux4:
3307      case Iop_PwAddL16Sx4:
3308         return mkPCast32x2(mce,
3309               assignNew('V', mce, Ity_I64, unop(op, mkPCast16x4(mce, vatom))));
3310
3311      case Iop_PwAddL8Ux8:
3312      case Iop_PwAddL8Sx8:
3313         return mkPCast16x4(mce,
3314               assignNew('V', mce, Ity_I64, unop(op, mkPCast8x8(mce, vatom))));
3315
3316      case Iop_PwAddL32Ux4:
3317      case Iop_PwAddL32Sx4:
3318         return mkPCast64x2(mce,
3319               assignNew('V', mce, Ity_V128, unop(op, mkPCast32x4(mce, vatom))));
3320
3321      case Iop_PwAddL16Ux8:
3322      case Iop_PwAddL16Sx8:
3323         return mkPCast32x4(mce,
3324               assignNew('V', mce, Ity_V128, unop(op, mkPCast16x8(mce, vatom))));
3325
3326      case Iop_PwAddL8Ux16:
3327      case Iop_PwAddL8Sx16:
3328         return mkPCast16x8(mce,
3329               assignNew('V', mce, Ity_V128, unop(op, mkPCast8x16(mce, vatom))));
3330
3331      default:
3332         ppIROp(op);
3333         VG_(tool_panic)("memcheck:expr2vbits_Unop");
3334   }
3335}
3336
3337
3338/* Worker function; do not call directly. */
3339static
3340IRAtom* expr2vbits_Load_WRK ( MCEnv* mce,
3341                              IREndness end, IRType ty,
3342                              IRAtom* addr, UInt bias )
3343{
3344   void*    helper;
3345   Char*    hname;
3346   IRDirty* di;
3347   IRTemp   datavbits;
3348   IRAtom*  addrAct;
3349
3350   tl_assert(isOriginalAtom(mce,addr));
3351   tl_assert(end == Iend_LE || end == Iend_BE);
3352
3353   /* First, emit a definedness test for the address.  This also sets
3354      the address (shadow) to 'defined' following the test. */
3355   complainIfUndefined( mce, addr );
3356
3357   /* Now cook up a call to the relevant helper function, to read the
3358      data V bits from shadow memory. */
3359   ty = shadowTypeV(ty);
3360
3361   if (end == Iend_LE) {
3362      switch (ty) {
3363         case Ity_I64: helper = &MC_(helperc_LOADV64le);
3364                       hname = "MC_(helperc_LOADV64le)";
3365                       break;
3366         case Ity_I32: helper = &MC_(helperc_LOADV32le);
3367                       hname = "MC_(helperc_LOADV32le)";
3368                       break;
3369         case Ity_I16: helper = &MC_(helperc_LOADV16le);
3370                       hname = "MC_(helperc_LOADV16le)";
3371                       break;
3372         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3373                       hname = "MC_(helperc_LOADV8)";
3374                       break;
3375         default:      ppIRType(ty);
3376                       VG_(tool_panic)("memcheck:do_shadow_Load(LE)");
3377      }
3378   } else {
3379      switch (ty) {
3380         case Ity_I64: helper = &MC_(helperc_LOADV64be);
3381                       hname = "MC_(helperc_LOADV64be)";
3382                       break;
3383         case Ity_I32: helper = &MC_(helperc_LOADV32be);
3384                       hname = "MC_(helperc_LOADV32be)";
3385                       break;
3386         case Ity_I16: helper = &MC_(helperc_LOADV16be);
3387                       hname = "MC_(helperc_LOADV16be)";
3388                       break;
3389         case Ity_I8:  helper = &MC_(helperc_LOADV8);
3390                       hname = "MC_(helperc_LOADV8)";
3391                       break;
3392         default:      ppIRType(ty);
3393                       VG_(tool_panic)("memcheck:do_shadow_Load(BE)");
3394      }
3395   }
3396
3397   /* Generate the actual address into addrAct. */
3398   if (bias == 0) {
3399      addrAct = addr;
3400   } else {
3401      IROp    mkAdd;
3402      IRAtom* eBias;
3403      IRType  tyAddr  = mce->hWordTy;
3404      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3405      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3406      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3407      addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias) );
3408   }
3409
3410   /* We need to have a place to park the V bits we're just about to
3411      read. */
3412   datavbits = newTemp(mce, ty, VSh);
3413   di = unsafeIRDirty_1_N( datavbits,
3414                           1/*regparms*/,
3415                           hname, VG_(fnptr_to_fnentry)( helper ),
3416                           mkIRExprVec_1( addrAct ));
3417   setHelperAnns( mce, di );
3418   stmt( 'V', mce, IRStmt_Dirty(di) );
3419
3420   return mkexpr(datavbits);
3421}
3422
3423
3424static
3425IRAtom* expr2vbits_Load ( MCEnv* mce,
3426                          IREndness end, IRType ty,
3427                          IRAtom* addr, UInt bias )
3428{
3429   IRAtom *v64hi, *v64lo;
3430   tl_assert(end == Iend_LE || end == Iend_BE);
3431   switch (shadowTypeV(ty)) {
3432      case Ity_I8:
3433      case Ity_I16:
3434      case Ity_I32:
3435      case Ity_I64:
3436         return expr2vbits_Load_WRK(mce, end, ty, addr, bias);
3437      case Ity_V128:
3438         if (end == Iend_LE) {
3439            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3440            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3441         } else {
3442            v64hi = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias);
3443            v64lo = expr2vbits_Load_WRK(mce, end, Ity_I64, addr, bias+8);
3444         }
3445         return assignNew( 'V', mce,
3446                           Ity_V128,
3447                           binop(Iop_64HLtoV128, v64hi, v64lo));
3448      default:
3449         VG_(tool_panic)("expr2vbits_Load");
3450   }
3451}
3452
3453
3454static
3455IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
3456                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
3457{
3458   IRAtom *vbitsC, *vbits0, *vbitsX;
3459   IRType ty;
3460   /* Given Mux0X(cond,expr0,exprX), generate
3461         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
3462      That is, steer the V bits like the originals, but trash the
3463      result if the steering value is undefined.  This gives
3464      lazy propagation. */
3465   tl_assert(isOriginalAtom(mce, cond));
3466   tl_assert(isOriginalAtom(mce, expr0));
3467   tl_assert(isOriginalAtom(mce, exprX));
3468
3469   vbitsC = expr2vbits(mce, cond);
3470   vbits0 = expr2vbits(mce, expr0);
3471   vbitsX = expr2vbits(mce, exprX);
3472   ty = typeOfIRExpr(mce->sb->tyenv, vbits0);
3473
3474   return
3475      mkUifU(mce, ty, assignNew('V', mce, ty,
3476                                     IRExpr_Mux0X(cond, vbits0, vbitsX)),
3477                      mkPCastTo(mce, ty, vbitsC) );
3478}
3479
3480/* --------- This is the main expression-handling function. --------- */
3481
3482static
3483IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
3484{
3485   switch (e->tag) {
3486
3487      case Iex_Get:
3488         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
3489
3490      case Iex_GetI:
3491         return shadow_GETI( mce, e->Iex.GetI.descr,
3492                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
3493
3494      case Iex_RdTmp:
3495         return IRExpr_RdTmp( findShadowTmpV(mce, e->Iex.RdTmp.tmp) );
3496
3497      case Iex_Const:
3498         return definedOfType(shadowTypeV(typeOfIRExpr(mce->sb->tyenv, e)));
3499
3500      case Iex_Qop:
3501         return expr2vbits_Qop(
3502                   mce,
3503                   e->Iex.Qop.op,
3504                   e->Iex.Qop.arg1, e->Iex.Qop.arg2,
3505		   e->Iex.Qop.arg3, e->Iex.Qop.arg4
3506                );
3507
3508      case Iex_Triop:
3509         return expr2vbits_Triop(
3510                   mce,
3511                   e->Iex.Triop.op,
3512                   e->Iex.Triop.arg1, e->Iex.Triop.arg2, e->Iex.Triop.arg3
3513                );
3514
3515      case Iex_Binop:
3516         return expr2vbits_Binop(
3517                   mce,
3518                   e->Iex.Binop.op,
3519                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
3520                );
3521
3522      case Iex_Unop:
3523         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
3524
3525      case Iex_Load:
3526         return expr2vbits_Load( mce, e->Iex.Load.end,
3527                                      e->Iex.Load.ty,
3528                                      e->Iex.Load.addr, 0/*addr bias*/ );
3529
3530      case Iex_CCall:
3531         return mkLazyN( mce, e->Iex.CCall.args,
3532                              e->Iex.CCall.retty,
3533                              e->Iex.CCall.cee );
3534
3535      case Iex_Mux0X:
3536         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
3537                                       e->Iex.Mux0X.exprX);
3538
3539      default:
3540         VG_(printf)("\n");
3541         ppIRExpr(e);
3542         VG_(printf)("\n");
3543         VG_(tool_panic)("memcheck: expr2vbits");
3544   }
3545}
3546
3547/*------------------------------------------------------------*/
3548/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
3549/*------------------------------------------------------------*/
3550
3551/* Widen a value to the host word size. */
3552
3553static
3554IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
3555{
3556   IRType ty, tyH;
3557
3558   /* vatom is vbits-value and as such can only have a shadow type. */
3559   tl_assert(isShadowAtom(mce,vatom));
3560
3561   ty  = typeOfIRExpr(mce->sb->tyenv, vatom);
3562   tyH = mce->hWordTy;
3563
3564   if (tyH == Ity_I32) {
3565      switch (ty) {
3566         case Ity_I32:
3567            return vatom;
3568         case Ity_I16:
3569            return assignNew('V', mce, tyH, unop(Iop_16Uto32, vatom));
3570         case Ity_I8:
3571            return assignNew('V', mce, tyH, unop(Iop_8Uto32, vatom));
3572         default:
3573            goto unhandled;
3574      }
3575   } else
3576   if (tyH == Ity_I64) {
3577      switch (ty) {
3578         case Ity_I32:
3579            return assignNew('V', mce, tyH, unop(Iop_32Uto64, vatom));
3580         case Ity_I16:
3581            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3582                   assignNew('V', mce, Ity_I32, unop(Iop_16Uto32, vatom))));
3583         case Ity_I8:
3584            return assignNew('V', mce, tyH, unop(Iop_32Uto64,
3585                   assignNew('V', mce, Ity_I32, unop(Iop_8Uto32, vatom))));
3586         default:
3587            goto unhandled;
3588      }
3589   } else {
3590      goto unhandled;
3591   }
3592  unhandled:
3593   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
3594   VG_(tool_panic)("zwidenToHostWord");
3595}
3596
3597
3598/* Generate a shadow store.  addr is always the original address atom.
3599   You can pass in either originals or V-bits for the data atom, but
3600   obviously not both.  guard :: Ity_I1 controls whether the store
3601   really happens; NULL means it unconditionally does.  Note that
3602   guard itself is not checked for definedness; the caller of this
3603   function must do that if necessary. */
3604
3605static
3606void do_shadow_Store ( MCEnv* mce,
3607                       IREndness end,
3608                       IRAtom* addr, UInt bias,
3609                       IRAtom* data, IRAtom* vdata,
3610                       IRAtom* guard )
3611{
3612   IROp     mkAdd;
3613   IRType   ty, tyAddr;
3614   void*    helper = NULL;
3615   Char*    hname = NULL;
3616   IRConst* c;
3617
3618   tyAddr = mce->hWordTy;
3619   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
3620   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
3621   tl_assert( end == Iend_LE || end == Iend_BE );
3622
3623   if (data) {
3624      tl_assert(!vdata);
3625      tl_assert(isOriginalAtom(mce, data));
3626      tl_assert(bias == 0);
3627      vdata = expr2vbits( mce, data );
3628   } else {
3629      tl_assert(vdata);
3630   }
3631
3632   tl_assert(isOriginalAtom(mce,addr));
3633   tl_assert(isShadowAtom(mce,vdata));
3634
3635   if (guard) {
3636      tl_assert(isOriginalAtom(mce, guard));
3637      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
3638   }
3639
3640   ty = typeOfIRExpr(mce->sb->tyenv, vdata);
3641
3642   // If we're not doing undefined value checking, pretend that this value
3643   // is "all valid".  That lets Vex's optimiser remove some of the V bit
3644   // shadow computation ops that precede it.
3645   if (MC_(clo_mc_level) == 1) {
3646      switch (ty) {
3647         case Ity_V128: // V128 weirdness
3648                        c = IRConst_V128(V_BITS16_DEFINED); break;
3649         case Ity_I64:  c = IRConst_U64 (V_BITS64_DEFINED); break;
3650         case Ity_I32:  c = IRConst_U32 (V_BITS32_DEFINED); break;
3651         case Ity_I16:  c = IRConst_U16 (V_BITS16_DEFINED); break;
3652         case Ity_I8:   c = IRConst_U8  (V_BITS8_DEFINED);  break;
3653         default:       VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3654      }
3655      vdata = IRExpr_Const( c );
3656   }
3657
3658   /* First, emit a definedness test for the address.  This also sets
3659      the address (shadow) to 'defined' following the test. */
3660   complainIfUndefined( mce, addr );
3661
3662   /* Now decide which helper function to call to write the data V
3663      bits into shadow memory. */
3664   if (end == Iend_LE) {
3665      switch (ty) {
3666         case Ity_V128: /* we'll use the helper twice */
3667         case Ity_I64: helper = &MC_(helperc_STOREV64le);
3668                       hname = "MC_(helperc_STOREV64le)";
3669                       break;
3670         case Ity_I32: helper = &MC_(helperc_STOREV32le);
3671                       hname = "MC_(helperc_STOREV32le)";
3672                       break;
3673         case Ity_I16: helper = &MC_(helperc_STOREV16le);
3674                       hname = "MC_(helperc_STOREV16le)";
3675                       break;
3676         case Ity_I8:  helper = &MC_(helperc_STOREV8);
3677                       hname = "MC_(helperc_STOREV8)";
3678                       break;
3679         default:      VG_(tool_panic)("memcheck:do_shadow_Store(LE)");
3680      }
3681   } else {
3682      switch (ty) {
3683         case Ity_V128: /* we'll use the helper twice */
3684         case Ity_I64: helper = &MC_(helperc_STOREV64be);
3685                       hname = "MC_(helperc_STOREV64be)";
3686                       break;
3687         case Ity_I32: helper = &MC_(helperc_STOREV32be);
3688                       hname = "MC_(helperc_STOREV32be)";
3689                       break;
3690         case Ity_I16: helper = &MC_(helperc_STOREV16be);
3691                       hname = "MC_(helperc_STOREV16be)";
3692                       break;
3693         case Ity_I8:  helper = &MC_(helperc_STOREV8);
3694                       hname = "MC_(helperc_STOREV8)";
3695                       break;
3696         default:      VG_(tool_panic)("memcheck:do_shadow_Store(BE)");
3697      }
3698   }
3699
3700   if (ty == Ity_V128) {
3701
3702      /* V128-bit case */
3703      /* See comment in next clause re 64-bit regparms */
3704      /* also, need to be careful about endianness */
3705
3706      Int     offLo64, offHi64;
3707      IRDirty *diLo64, *diHi64;
3708      IRAtom  *addrLo64, *addrHi64;
3709      IRAtom  *vdataLo64, *vdataHi64;
3710      IRAtom  *eBiasLo64, *eBiasHi64;
3711
3712      if (end == Iend_LE) {
3713         offLo64 = 0;
3714         offHi64 = 8;
3715      } else {
3716         offLo64 = 8;
3717         offHi64 = 0;
3718      }
3719
3720      eBiasLo64 = tyAddr==Ity_I32 ? mkU32(bias+offLo64) : mkU64(bias+offLo64);
3721      addrLo64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasLo64) );
3722      vdataLo64 = assignNew('V', mce, Ity_I64, unop(Iop_V128to64, vdata));
3723      diLo64    = unsafeIRDirty_0_N(
3724                     1/*regparms*/,
3725                     hname, VG_(fnptr_to_fnentry)( helper ),
3726                     mkIRExprVec_2( addrLo64, vdataLo64 )
3727                  );
3728      eBiasHi64 = tyAddr==Ity_I32 ? mkU32(bias+offHi64) : mkU64(bias+offHi64);
3729      addrHi64  = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBiasHi64) );
3730      vdataHi64 = assignNew('V', mce, Ity_I64, unop(Iop_V128HIto64, vdata));
3731      diHi64    = unsafeIRDirty_0_N(
3732                     1/*regparms*/,
3733                     hname, VG_(fnptr_to_fnentry)( helper ),
3734                     mkIRExprVec_2( addrHi64, vdataHi64 )
3735                  );
3736      if (guard) diLo64->guard = guard;
3737      if (guard) diHi64->guard = guard;
3738      setHelperAnns( mce, diLo64 );
3739      setHelperAnns( mce, diHi64 );
3740      stmt( 'V', mce, IRStmt_Dirty(diLo64) );
3741      stmt( 'V', mce, IRStmt_Dirty(diHi64) );
3742
3743   } else {
3744
3745      IRDirty *di;
3746      IRAtom  *addrAct;
3747
3748      /* 8/16/32/64-bit cases */
3749      /* Generate the actual address into addrAct. */
3750      if (bias == 0) {
3751         addrAct = addr;
3752      } else {
3753         IRAtom* eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
3754         addrAct = assignNew('V', mce, tyAddr, binop(mkAdd, addr, eBias));
3755      }
3756
3757      if (ty == Ity_I64) {
3758         /* We can't do this with regparm 2 on 32-bit platforms, since
3759            the back ends aren't clever enough to handle 64-bit
3760            regparm args.  Therefore be different. */
3761         di = unsafeIRDirty_0_N(
3762                 1/*regparms*/,
3763                 hname, VG_(fnptr_to_fnentry)( helper ),
3764                 mkIRExprVec_2( addrAct, vdata )
3765              );
3766      } else {
3767         di = unsafeIRDirty_0_N(
3768                 2/*regparms*/,
3769                 hname, VG_(fnptr_to_fnentry)( helper ),
3770                 mkIRExprVec_2( addrAct,
3771                                zwidenToHostWord( mce, vdata ))
3772              );
3773      }
3774      if (guard) di->guard = guard;
3775      setHelperAnns( mce, di );
3776      stmt( 'V', mce, IRStmt_Dirty(di) );
3777   }
3778
3779}
3780
3781
3782/* Do lazy pessimistic propagation through a dirty helper call, by
3783   looking at the annotations on it.  This is the most complex part of
3784   Memcheck. */
3785
3786static IRType szToITy ( Int n )
3787{
3788   switch (n) {
3789      case 1: return Ity_I8;
3790      case 2: return Ity_I16;
3791      case 4: return Ity_I32;
3792      case 8: return Ity_I64;
3793      default: VG_(tool_panic)("szToITy(memcheck)");
3794   }
3795}
3796
3797static
3798void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
3799{
3800   Int       i, n, toDo, gSz, gOff;
3801   IRAtom    *src, *here, *curr;
3802   IRType    tySrc, tyDst;
3803   IRTemp    dst;
3804   IREndness end;
3805
3806   /* What's the native endianness?  We need to know this. */
3807#  if defined(VG_BIGENDIAN)
3808   end = Iend_BE;
3809#  elif defined(VG_LITTLEENDIAN)
3810   end = Iend_LE;
3811#  else
3812#    error "Unknown endianness"
3813#  endif
3814
3815   /* First check the guard. */
3816   complainIfUndefined(mce, d->guard);
3817
3818   /* Now round up all inputs and PCast over them. */
3819   curr = definedOfType(Ity_I32);
3820
3821   /* Inputs: unmasked args */
3822   for (i = 0; d->args[i]; i++) {
3823      if (d->cee->mcx_mask & (1<<i)) {
3824         /* ignore this arg */
3825      } else {
3826         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
3827         curr = mkUifU32(mce, here, curr);
3828      }
3829   }
3830
3831   /* Inputs: guest state that we read. */
3832   for (i = 0; i < d->nFxState; i++) {
3833      tl_assert(d->fxState[i].fx != Ifx_None);
3834      if (d->fxState[i].fx == Ifx_Write)
3835         continue;
3836
3837      /* Ignore any sections marked as 'always defined'. */
3838      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
3839         if (0)
3840         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
3841                     d->fxState[i].offset, d->fxState[i].size );
3842         continue;
3843      }
3844
3845      /* This state element is read or modified.  So we need to
3846         consider it.  If larger than 8 bytes, deal with it in 8-byte
3847         chunks. */
3848      gSz  = d->fxState[i].size;
3849      gOff = d->fxState[i].offset;
3850      tl_assert(gSz > 0);
3851      while (True) {
3852         if (gSz == 0) break;
3853         n = gSz <= 8 ? gSz : 8;
3854         /* update 'curr' with UifU of the state slice
3855            gOff .. gOff+n-1 */
3856         tySrc = szToITy( n );
3857         src   = assignNew( 'V', mce, tySrc,
3858                                 shadow_GET(mce, gOff, tySrc ) );
3859         here = mkPCastTo( mce, Ity_I32, src );
3860         curr = mkUifU32(mce, here, curr);
3861         gSz -= n;
3862         gOff += n;
3863      }
3864
3865   }
3866
3867   /* Inputs: memory.  First set up some info needed regardless of
3868      whether we're doing reads or writes. */
3869
3870   if (d->mFx != Ifx_None) {
3871      /* Because we may do multiple shadow loads/stores from the same
3872         base address, it's best to do a single test of its
3873         definedness right now.  Post-instrumentation optimisation
3874         should remove all but this test. */
3875      IRType tyAddr;
3876      tl_assert(d->mAddr);
3877      complainIfUndefined(mce, d->mAddr);
3878
3879      tyAddr = typeOfIRExpr(mce->sb->tyenv, d->mAddr);
3880      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
3881      tl_assert(tyAddr == mce->hWordTy); /* not really right */
3882   }
3883
3884   /* Deal with memory inputs (reads or modifies) */
3885   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
3886      toDo   = d->mSize;
3887      /* chew off 32-bit chunks.  We don't care about the endianness
3888         since it's all going to be condensed down to a single bit,
3889         but nevertheless choose an endianness which is hopefully
3890         native to the platform. */
3891      while (toDo >= 4) {
3892         here = mkPCastTo(
3893                   mce, Ity_I32,
3894                   expr2vbits_Load ( mce, end, Ity_I32,
3895                                     d->mAddr, d->mSize - toDo )
3896                );
3897         curr = mkUifU32(mce, here, curr);
3898         toDo -= 4;
3899      }
3900      /* chew off 16-bit chunks */
3901      while (toDo >= 2) {
3902         here = mkPCastTo(
3903                   mce, Ity_I32,
3904                   expr2vbits_Load ( mce, end, Ity_I16,
3905                                     d->mAddr, d->mSize - toDo )
3906                );
3907         curr = mkUifU32(mce, here, curr);
3908         toDo -= 2;
3909      }
3910      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3911   }
3912
3913   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
3914      all the inputs to the helper.  Now we need to re-distribute the
3915      results to all destinations. */
3916
3917   /* Outputs: the destination temporary, if there is one. */
3918   if (d->tmp != IRTemp_INVALID) {
3919      dst   = findShadowTmpV(mce, d->tmp);
3920      tyDst = typeOfIRTemp(mce->sb->tyenv, d->tmp);
3921      assign( 'V', mce, dst, mkPCastTo( mce, tyDst, curr) );
3922   }
3923
3924   /* Outputs: guest state that we write or modify. */
3925   for (i = 0; i < d->nFxState; i++) {
3926      tl_assert(d->fxState[i].fx != Ifx_None);
3927      if (d->fxState[i].fx == Ifx_Read)
3928         continue;
3929      /* Ignore any sections marked as 'always defined'. */
3930      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
3931         continue;
3932      /* This state element is written or modified.  So we need to
3933         consider it.  If larger than 8 bytes, deal with it in 8-byte
3934         chunks. */
3935      gSz  = d->fxState[i].size;
3936      gOff = d->fxState[i].offset;
3937      tl_assert(gSz > 0);
3938      while (True) {
3939         if (gSz == 0) break;
3940         n = gSz <= 8 ? gSz : 8;
3941         /* Write suitably-casted 'curr' to the state slice
3942            gOff .. gOff+n-1 */
3943         tyDst = szToITy( n );
3944         do_shadow_PUT( mce, gOff,
3945                             NULL, /* original atom */
3946                             mkPCastTo( mce, tyDst, curr ) );
3947         gSz -= n;
3948         gOff += n;
3949      }
3950   }
3951
3952   /* Outputs: memory that we write or modify.  Same comments about
3953      endianness as above apply. */
3954   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
3955      toDo   = d->mSize;
3956      /* chew off 32-bit chunks */
3957      while (toDo >= 4) {
3958         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3959                          NULL, /* original data */
3960                          mkPCastTo( mce, Ity_I32, curr ),
3961                          NULL/*guard*/ );
3962         toDo -= 4;
3963      }
3964      /* chew off 16-bit chunks */
3965      while (toDo >= 2) {
3966         do_shadow_Store( mce, end, d->mAddr, d->mSize - toDo,
3967                          NULL, /* original data */
3968                          mkPCastTo( mce, Ity_I16, curr ),
3969                          NULL/*guard*/ );
3970         toDo -= 2;
3971      }
3972      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
3973   }
3974
3975}
3976
3977
3978/* We have an ABI hint telling us that [base .. base+len-1] is to
3979   become undefined ("writable").  Generate code to call a helper to
3980   notify the A/V bit machinery of this fact.
3981
3982   We call
3983   void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len,
3984                                                    Addr nia );
3985*/
3986static
3987void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len, IRExpr* nia )
3988{
3989   IRDirty* di;
3990   /* Minor optimisation: if not doing origin tracking, ignore the
3991      supplied nia and pass zero instead.  This is on the basis that
3992      MC_(helperc_MAKE_STACK_UNINIT) will ignore it anyway, and we can
3993      almost always generate a shorter instruction to put zero into a
3994      register than any other value. */
3995   if (MC_(clo_mc_level) < 3)
3996      nia = mkIRExpr_HWord(0);
3997
3998   di = unsafeIRDirty_0_N(
3999           0/*regparms*/,
4000           "MC_(helperc_MAKE_STACK_UNINIT)",
4001           VG_(fnptr_to_fnentry)( &MC_(helperc_MAKE_STACK_UNINIT) ),
4002           mkIRExprVec_3( base, mkIRExpr_HWord( (UInt)len), nia )
4003        );
4004   stmt( 'V', mce, IRStmt_Dirty(di) );
4005}
4006
4007
4008/* ------ Dealing with IRCAS (big and complex) ------ */
4009
4010/* FWDS */
4011static IRAtom* gen_load_b  ( MCEnv* mce, Int szB,
4012                             IRAtom* baseaddr, Int offset );
4013static IRAtom* gen_maxU32  ( MCEnv* mce, IRAtom* b1, IRAtom* b2 );
4014static void    gen_store_b ( MCEnv* mce, Int szB,
4015                             IRAtom* baseaddr, Int offset, IRAtom* dataB,
4016                             IRAtom* guard );
4017
4018static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas );
4019static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas );
4020
4021
4022/* Either ORIG and SHADOW are both IRExpr.RdTmps, or they are both
4023   IRExpr.Consts, else this asserts.  If they are both Consts, it
4024   doesn't do anything.  So that just leaves the RdTmp case.
4025
4026   In which case: this assigns the shadow value SHADOW to the IR
4027   shadow temporary associated with ORIG.  That is, ORIG, being an
4028   original temporary, will have a shadow temporary associated with
4029   it.  However, in the case envisaged here, there will so far have
4030   been no IR emitted to actually write a shadow value into that
4031   temporary.  What this routine does is to (emit IR to) copy the
4032   value in SHADOW into said temporary, so that after this call,
4033   IRExpr.RdTmps of ORIG's shadow temp will correctly pick up the
4034   value in SHADOW.
4035
4036   Point is to allow callers to compute "by hand" a shadow value for
4037   ORIG, and force it to be associated with ORIG.
4038
4039   How do we know that that shadow associated with ORIG has not so far
4040   been assigned to?  Well, we don't per se know that, but supposing
4041   it had.  Then this routine would create a second assignment to it,
4042   and later the IR sanity checker would barf.  But that never
4043   happens.  QED.
4044*/
4045static void bind_shadow_tmp_to_orig ( UChar how,
4046                                      MCEnv* mce,
4047                                      IRAtom* orig, IRAtom* shadow )
4048{
4049   tl_assert(isOriginalAtom(mce, orig));
4050   tl_assert(isShadowAtom(mce, shadow));
4051   switch (orig->tag) {
4052      case Iex_Const:
4053         tl_assert(shadow->tag == Iex_Const);
4054         break;
4055      case Iex_RdTmp:
4056         tl_assert(shadow->tag == Iex_RdTmp);
4057         if (how == 'V') {
4058            assign('V', mce, findShadowTmpV(mce,orig->Iex.RdTmp.tmp),
4059                   shadow);
4060         } else {
4061            tl_assert(how == 'B');
4062            assign('B', mce, findShadowTmpB(mce,orig->Iex.RdTmp.tmp),
4063                   shadow);
4064         }
4065         break;
4066      default:
4067         tl_assert(0);
4068   }
4069}
4070
4071
4072static
4073void do_shadow_CAS ( MCEnv* mce, IRCAS* cas )
4074{
4075   /* Scheme is (both single- and double- cases):
4076
4077      1. fetch data#,dataB (the proposed new value)
4078
4079      2. fetch expd#,expdB (what we expect to see at the address)
4080
4081      3. check definedness of address
4082
4083      4. load old#,oldB from shadow memory; this also checks
4084         addressibility of the address
4085
4086      5. the CAS itself
4087
4088      6. compute "expected == old".  See COMMENT_ON_CasCmpEQ below.
4089
4090      7. if "expected == old" (as computed by (6))
4091            store data#,dataB to shadow memory
4092
4093      Note that 5 reads 'old' but 4 reads 'old#'.  Similarly, 5 stores
4094      'data' but 7 stores 'data#'.  Hence it is possible for the
4095      shadow data to be incorrectly checked and/or updated:
4096
4097      * 7 is at least gated correctly, since the 'expected == old'
4098        condition is derived from outputs of 5.  However, the shadow
4099        write could happen too late: imagine after 5 we are
4100        descheduled, a different thread runs, writes a different
4101        (shadow) value at the address, and then we resume, hence
4102        overwriting the shadow value written by the other thread.
4103
4104      Because the original memory access is atomic, there's no way to
4105      make both the original and shadow accesses into a single atomic
4106      thing, hence this is unavoidable.
4107
4108      At least as Valgrind stands, I don't think it's a problem, since
4109      we're single threaded *and* we guarantee that there are no
4110      context switches during the execution of any specific superblock
4111      -- context switches can only happen at superblock boundaries.
4112
4113      If Valgrind ever becomes MT in the future, then it might be more
4114      of a problem.  A possible kludge would be to artificially
4115      associate with the location, a lock, which we must acquire and
4116      release around the transaction as a whole.  Hmm, that probably
4117      would't work properly since it only guards us against other
4118      threads doing CASs on the same location, not against other
4119      threads doing normal reads and writes.
4120
4121      ------------------------------------------------------------
4122
4123      COMMENT_ON_CasCmpEQ:
4124
4125      Note two things.  Firstly, in the sequence above, we compute
4126      "expected == old", but we don't check definedness of it.  Why
4127      not?  Also, the x86 and amd64 front ends use
4128      Iop_CmpCas{EQ,NE}{8,16,32,64} comparisons to make the equivalent
4129      determination (expected == old ?) for themselves, and we also
4130      don't check definedness for those primops; we just say that the
4131      result is defined.  Why?  Details follow.
4132
4133      x86/amd64 contains various forms of locked insns:
4134      * lock prefix before all basic arithmetic insn;
4135        eg lock xorl %reg1,(%reg2)
4136      * atomic exchange reg-mem
4137      * compare-and-swaps
4138
4139      Rather than attempt to represent them all, which would be a
4140      royal PITA, I used a result from Maurice Herlihy
4141      (http://en.wikipedia.org/wiki/Maurice_Herlihy), in which he
4142      demonstrates that compare-and-swap is a primitive more general
4143      than the other two, and so can be used to represent all of them.
4144      So the translation scheme for (eg) lock incl (%reg) is as
4145      follows:
4146
4147        again:
4148         old = * %reg
4149         new = old + 1
4150         atomically { if (* %reg == old) { * %reg = new } else { goto again } }
4151
4152      The "atomically" is the CAS bit.  The scheme is always the same:
4153      get old value from memory, compute new value, atomically stuff
4154      new value back in memory iff the old value has not changed (iow,
4155      no other thread modified it in the meantime).  If it has changed
4156      then we've been out-raced and we have to start over.
4157
4158      Now that's all very neat, but it has the bad side effect of
4159      introducing an explicit equality test into the translation.
4160      Consider the behaviour of said code on a memory location which
4161      is uninitialised.  We will wind up doing a comparison on
4162      uninitialised data, and mc duly complains.
4163
4164      What's difficult about this is, the common case is that the
4165      location is uncontended, and so we're usually comparing the same
4166      value (* %reg) with itself.  So we shouldn't complain even if it
4167      is undefined.  But mc doesn't know that.
4168
4169      My solution is to mark the == in the IR specially, so as to tell
4170      mc that it almost certainly compares a value with itself, and we
4171      should just regard the result as always defined.  Rather than
4172      add a bit to all IROps, I just cloned Iop_CmpEQ{8,16,32,64} into
4173      Iop_CasCmpEQ{8,16,32,64} so as not to disturb anything else.
4174
4175      So there's always the question of, can this give a false
4176      negative?  eg, imagine that initially, * %reg is defined; and we
4177      read that; but then in the gap between the read and the CAS, a
4178      different thread writes an undefined (and different) value at
4179      the location.  Then the CAS in this thread will fail and we will
4180      go back to "again:", but without knowing that the trip back
4181      there was based on an undefined comparison.  No matter; at least
4182      the other thread won the race and the location is correctly
4183      marked as undefined.  What if it wrote an uninitialised version
4184      of the same value that was there originally, though?
4185
4186      etc etc.  Seems like there's a small corner case in which we
4187      might lose the fact that something's defined -- we're out-raced
4188      in between the "old = * reg" and the "atomically {", _and_ the
4189      other thread is writing in an undefined version of what's
4190      already there.  Well, that seems pretty unlikely.
4191
4192      ---
4193
4194      If we ever need to reinstate it .. code which generates a
4195      definedness test for "expected == old" was removed at r10432 of
4196      this file.
4197   */
4198   if (cas->oldHi == IRTemp_INVALID) {
4199      do_shadow_CAS_single( mce, cas );
4200   } else {
4201      do_shadow_CAS_double( mce, cas );
4202   }
4203}
4204
4205
4206static void do_shadow_CAS_single ( MCEnv* mce, IRCAS* cas )
4207{
4208   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4209   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4210   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4211   IRAtom *expd_eq_old = NULL;
4212   IROp   opCasCmpEQ;
4213   Int    elemSzB;
4214   IRType elemTy;
4215   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4216
4217   /* single CAS */
4218   tl_assert(cas->oldHi == IRTemp_INVALID);
4219   tl_assert(cas->expdHi == NULL);
4220   tl_assert(cas->dataHi == NULL);
4221
4222   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4223   switch (elemTy) {
4224      case Ity_I8:  elemSzB = 1; opCasCmpEQ = Iop_CasCmpEQ8;  break;
4225      case Ity_I16: elemSzB = 2; opCasCmpEQ = Iop_CasCmpEQ16; break;
4226      case Ity_I32: elemSzB = 4; opCasCmpEQ = Iop_CasCmpEQ32; break;
4227      case Ity_I64: elemSzB = 8; opCasCmpEQ = Iop_CasCmpEQ64; break;
4228      default: tl_assert(0); /* IR defn disallows any other types */
4229   }
4230
4231   /* 1. fetch data# (the proposed new value) */
4232   tl_assert(isOriginalAtom(mce, cas->dataLo));
4233   vdataLo
4234      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4235   tl_assert(isShadowAtom(mce, vdataLo));
4236   if (otrak) {
4237      bdataLo
4238         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4239      tl_assert(isShadowAtom(mce, bdataLo));
4240   }
4241
4242   /* 2. fetch expected# (what we expect to see at the address) */
4243   tl_assert(isOriginalAtom(mce, cas->expdLo));
4244   vexpdLo
4245      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4246   tl_assert(isShadowAtom(mce, vexpdLo));
4247   if (otrak) {
4248      bexpdLo
4249         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4250      tl_assert(isShadowAtom(mce, bexpdLo));
4251   }
4252
4253   /* 3. check definedness of address */
4254   /* 4. fetch old# from shadow memory; this also checks
4255         addressibility of the address */
4256   voldLo
4257      = assignNew(
4258           'V', mce, elemTy,
4259           expr2vbits_Load(
4260              mce,
4261              cas->end, elemTy, cas->addr, 0/*Addr bias*/
4262        ));
4263   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4264   if (otrak) {
4265      boldLo
4266         = assignNew('B', mce, Ity_I32,
4267                     gen_load_b(mce, elemSzB, cas->addr, 0/*addr bias*/));
4268      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4269   }
4270
4271   /* 5. the CAS itself */
4272   stmt( 'C', mce, IRStmt_CAS(cas) );
4273
4274   /* 6. compute "expected == old" */
4275   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4276   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4277      tree, but it's not copied from the input block. */
4278   expd_eq_old
4279      = assignNew('C', mce, Ity_I1,
4280                  binop(opCasCmpEQ, cas->expdLo, mkexpr(cas->oldLo)));
4281
4282   /* 7. if "expected == old"
4283            store data# to shadow memory */
4284   do_shadow_Store( mce, cas->end, cas->addr, 0/*bias*/,
4285                    NULL/*data*/, vdataLo/*vdata*/,
4286                    expd_eq_old/*guard for store*/ );
4287   if (otrak) {
4288      gen_store_b( mce, elemSzB, cas->addr, 0/*offset*/,
4289                   bdataLo/*bdata*/,
4290                   expd_eq_old/*guard for store*/ );
4291   }
4292}
4293
4294
4295static void do_shadow_CAS_double ( MCEnv* mce, IRCAS* cas )
4296{
4297   IRAtom *vdataHi = NULL, *bdataHi = NULL;
4298   IRAtom *vdataLo = NULL, *bdataLo = NULL;
4299   IRAtom *vexpdHi = NULL, *bexpdHi = NULL;
4300   IRAtom *vexpdLo = NULL, *bexpdLo = NULL;
4301   IRAtom *voldHi  = NULL, *boldHi  = NULL;
4302   IRAtom *voldLo  = NULL, *boldLo  = NULL;
4303   IRAtom *xHi = NULL, *xLo = NULL, *xHL = NULL;
4304   IRAtom *expd_eq_old = NULL, *zero = NULL;
4305   IROp   opCasCmpEQ, opOr, opXor;
4306   Int    elemSzB, memOffsLo, memOffsHi;
4307   IRType elemTy;
4308   Bool   otrak = MC_(clo_mc_level) >= 3; /* a shorthand */
4309
4310   /* double CAS */
4311   tl_assert(cas->oldHi != IRTemp_INVALID);
4312   tl_assert(cas->expdHi != NULL);
4313   tl_assert(cas->dataHi != NULL);
4314
4315   elemTy = typeOfIRExpr(mce->sb->tyenv, cas->expdLo);
4316   switch (elemTy) {
4317      case Ity_I8:
4318         opCasCmpEQ = Iop_CasCmpEQ8; opOr = Iop_Or8; opXor = Iop_Xor8;
4319         elemSzB = 1; zero = mkU8(0);
4320         break;
4321      case Ity_I16:
4322         opCasCmpEQ = Iop_CasCmpEQ16; opOr = Iop_Or16; opXor = Iop_Xor16;
4323         elemSzB = 2; zero = mkU16(0);
4324         break;
4325      case Ity_I32:
4326         opCasCmpEQ = Iop_CasCmpEQ32; opOr = Iop_Or32; opXor = Iop_Xor32;
4327         elemSzB = 4; zero = mkU32(0);
4328         break;
4329      case Ity_I64:
4330         opCasCmpEQ = Iop_CasCmpEQ64; opOr = Iop_Or64; opXor = Iop_Xor64;
4331         elemSzB = 8; zero = mkU64(0);
4332         break;
4333      default:
4334         tl_assert(0); /* IR defn disallows any other types */
4335   }
4336
4337   /* 1. fetch data# (the proposed new value) */
4338   tl_assert(isOriginalAtom(mce, cas->dataHi));
4339   tl_assert(isOriginalAtom(mce, cas->dataLo));
4340   vdataHi
4341      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataHi));
4342   vdataLo
4343      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->dataLo));
4344   tl_assert(isShadowAtom(mce, vdataHi));
4345   tl_assert(isShadowAtom(mce, vdataLo));
4346   if (otrak) {
4347      bdataHi
4348         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataHi));
4349      bdataLo
4350         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->dataLo));
4351      tl_assert(isShadowAtom(mce, bdataHi));
4352      tl_assert(isShadowAtom(mce, bdataLo));
4353   }
4354
4355   /* 2. fetch expected# (what we expect to see at the address) */
4356   tl_assert(isOriginalAtom(mce, cas->expdHi));
4357   tl_assert(isOriginalAtom(mce, cas->expdLo));
4358   vexpdHi
4359      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdHi));
4360   vexpdLo
4361      = assignNew('V', mce, elemTy, expr2vbits(mce, cas->expdLo));
4362   tl_assert(isShadowAtom(mce, vexpdHi));
4363   tl_assert(isShadowAtom(mce, vexpdLo));
4364   if (otrak) {
4365      bexpdHi
4366         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdHi));
4367      bexpdLo
4368         = assignNew('B', mce, Ity_I32, schemeE(mce, cas->expdLo));
4369      tl_assert(isShadowAtom(mce, bexpdHi));
4370      tl_assert(isShadowAtom(mce, bexpdLo));
4371   }
4372
4373   /* 3. check definedness of address */
4374   /* 4. fetch old# from shadow memory; this also checks
4375         addressibility of the address */
4376   if (cas->end == Iend_LE) {
4377      memOffsLo = 0;
4378      memOffsHi = elemSzB;
4379   } else {
4380      tl_assert(cas->end == Iend_BE);
4381      memOffsLo = elemSzB;
4382      memOffsHi = 0;
4383   }
4384   voldHi
4385      = assignNew(
4386           'V', mce, elemTy,
4387           expr2vbits_Load(
4388              mce,
4389              cas->end, elemTy, cas->addr, memOffsHi/*Addr bias*/
4390        ));
4391   voldLo
4392      = assignNew(
4393           'V', mce, elemTy,
4394           expr2vbits_Load(
4395              mce,
4396              cas->end, elemTy, cas->addr, memOffsLo/*Addr bias*/
4397        ));
4398   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldHi), voldHi);
4399   bind_shadow_tmp_to_orig('V', mce, mkexpr(cas->oldLo), voldLo);
4400   if (otrak) {
4401      boldHi
4402         = assignNew('B', mce, Ity_I32,
4403                     gen_load_b(mce, elemSzB, cas->addr,
4404                                memOffsHi/*addr bias*/));
4405      boldLo
4406         = assignNew('B', mce, Ity_I32,
4407                     gen_load_b(mce, elemSzB, cas->addr,
4408                                memOffsLo/*addr bias*/));
4409      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldHi), boldHi);
4410      bind_shadow_tmp_to_orig('B', mce, mkexpr(cas->oldLo), boldLo);
4411   }
4412
4413   /* 5. the CAS itself */
4414   stmt( 'C', mce, IRStmt_CAS(cas) );
4415
4416   /* 6. compute "expected == old" */
4417   /* See COMMENT_ON_CasCmpEQ in this file background/rationale. */
4418   /* Note that 'C' is kinda faking it; it is indeed a non-shadow
4419      tree, but it's not copied from the input block. */
4420   /*
4421      xHi = oldHi ^ expdHi;
4422      xLo = oldLo ^ expdLo;
4423      xHL = xHi | xLo;
4424      expd_eq_old = xHL == 0;
4425   */
4426   xHi = assignNew('C', mce, elemTy,
4427                   binop(opXor, cas->expdHi, mkexpr(cas->oldHi)));
4428   xLo = assignNew('C', mce, elemTy,
4429                   binop(opXor, cas->expdLo, mkexpr(cas->oldLo)));
4430   xHL = assignNew('C', mce, elemTy,
4431                   binop(opOr, xHi, xLo));
4432   expd_eq_old
4433      = assignNew('C', mce, Ity_I1,
4434                  binop(opCasCmpEQ, xHL, zero));
4435
4436   /* 7. if "expected == old"
4437            store data# to shadow memory */
4438   do_shadow_Store( mce, cas->end, cas->addr, memOffsHi/*bias*/,
4439                    NULL/*data*/, vdataHi/*vdata*/,
4440                    expd_eq_old/*guard for store*/ );
4441   do_shadow_Store( mce, cas->end, cas->addr, memOffsLo/*bias*/,
4442                    NULL/*data*/, vdataLo/*vdata*/,
4443                    expd_eq_old/*guard for store*/ );
4444   if (otrak) {
4445      gen_store_b( mce, elemSzB, cas->addr, memOffsHi/*offset*/,
4446                   bdataHi/*bdata*/,
4447                   expd_eq_old/*guard for store*/ );
4448      gen_store_b( mce, elemSzB, cas->addr, memOffsLo/*offset*/,
4449                   bdataLo/*bdata*/,
4450                   expd_eq_old/*guard for store*/ );
4451   }
4452}
4453
4454
4455/* ------ Dealing with LL/SC (not difficult) ------ */
4456
4457static void do_shadow_LLSC ( MCEnv*    mce,
4458                             IREndness stEnd,
4459                             IRTemp    stResult,
4460                             IRExpr*   stAddr,
4461                             IRExpr*   stStoredata )
4462{
4463   /* In short: treat a load-linked like a normal load followed by an
4464      assignment of the loaded (shadow) data to the result temporary.
4465      Treat a store-conditional like a normal store, and mark the
4466      result temporary as defined. */
4467   IRType resTy  = typeOfIRTemp(mce->sb->tyenv, stResult);
4468   IRTemp resTmp = findShadowTmpV(mce, stResult);
4469
4470   tl_assert(isIRAtom(stAddr));
4471   if (stStoredata)
4472      tl_assert(isIRAtom(stStoredata));
4473
4474   if (stStoredata == NULL) {
4475      /* Load Linked */
4476      /* Just treat this as a normal load, followed by an assignment of
4477         the value to .result. */
4478      /* Stay sane */
4479      tl_assert(resTy == Ity_I64 || resTy == Ity_I32
4480                || resTy == Ity_I16 || resTy == Ity_I8);
4481      assign( 'V', mce, resTmp,
4482                   expr2vbits_Load(
4483                      mce, stEnd, resTy, stAddr, 0/*addr bias*/));
4484   } else {
4485      /* Store Conditional */
4486      /* Stay sane */
4487      IRType dataTy = typeOfIRExpr(mce->sb->tyenv,
4488                                   stStoredata);
4489      tl_assert(dataTy == Ity_I64 || dataTy == Ity_I32
4490                || dataTy == Ity_I16 || dataTy == Ity_I8);
4491      do_shadow_Store( mce, stEnd,
4492                            stAddr, 0/* addr bias */,
4493                            stStoredata,
4494                            NULL /* shadow data */,
4495                            NULL/*guard*/ );
4496      /* This is a store conditional, so it writes to .result a value
4497         indicating whether or not the store succeeded.  Just claim
4498         this value is always defined.  In the PowerPC interpretation
4499         of store-conditional, definedness of the success indication
4500         depends on whether the address of the store matches the
4501         reservation address.  But we can't tell that here (and
4502         anyway, we're not being PowerPC-specific).  At least we are
4503         guaranteed that the definedness of the store address, and its
4504         addressibility, will be checked as per normal.  So it seems
4505         pretty safe to just say that the success indication is always
4506         defined.
4507
4508         In schemeS, for origin tracking, we must correspondingly set
4509         a no-origin value for the origin shadow of .result.
4510      */
4511      tl_assert(resTy == Ity_I1);
4512      assign( 'V', mce, resTmp, definedOfType(resTy) );
4513   }
4514}
4515
4516
4517/*------------------------------------------------------------*/
4518/*--- Memcheck main                                        ---*/
4519/*------------------------------------------------------------*/
4520
4521static void schemeS ( MCEnv* mce, IRStmt* st );
4522
4523static Bool isBogusAtom ( IRAtom* at )
4524{
4525   ULong n = 0;
4526   IRConst* con;
4527   tl_assert(isIRAtom(at));
4528   if (at->tag == Iex_RdTmp)
4529      return False;
4530   tl_assert(at->tag == Iex_Const);
4531   con = at->Iex.Const.con;
4532   switch (con->tag) {
4533      case Ico_U1:   return False;
4534      case Ico_U8:   n = (ULong)con->Ico.U8; break;
4535      case Ico_U16:  n = (ULong)con->Ico.U16; break;
4536      case Ico_U32:  n = (ULong)con->Ico.U32; break;
4537      case Ico_U64:  n = (ULong)con->Ico.U64; break;
4538      case Ico_F64:  return False;
4539      case Ico_F64i: return False;
4540      case Ico_V128: return False;
4541      default: ppIRExpr(at); tl_assert(0);
4542   }
4543   /* VG_(printf)("%llx\n", n); */
4544   return (/*32*/    n == 0xFEFEFEFFULL
4545           /*32*/ || n == 0x80808080ULL
4546           /*32*/ || n == 0x7F7F7F7FULL
4547           /*64*/ || n == 0xFFFFFFFFFEFEFEFFULL
4548           /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
4549           /*64*/ || n == 0x0000000000008080ULL
4550           /*64*/ || n == 0x8080808080808080ULL
4551           /*64*/ || n == 0x0101010101010101ULL
4552          );
4553}
4554
4555static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
4556{
4557   Int      i;
4558   IRExpr*  e;
4559   IRDirty* d;
4560   IRCAS*   cas;
4561   switch (st->tag) {
4562      case Ist_WrTmp:
4563         e = st->Ist.WrTmp.data;
4564         switch (e->tag) {
4565            case Iex_Get:
4566            case Iex_RdTmp:
4567               return False;
4568            case Iex_Const:
4569               return isBogusAtom(e);
4570            case Iex_Unop:
4571               return isBogusAtom(e->Iex.Unop.arg);
4572            case Iex_GetI:
4573               return isBogusAtom(e->Iex.GetI.ix);
4574            case Iex_Binop:
4575               return isBogusAtom(e->Iex.Binop.arg1)
4576                      || isBogusAtom(e->Iex.Binop.arg2);
4577            case Iex_Triop:
4578               return isBogusAtom(e->Iex.Triop.arg1)
4579                      || isBogusAtom(e->Iex.Triop.arg2)
4580                      || isBogusAtom(e->Iex.Triop.arg3);
4581            case Iex_Qop:
4582               return isBogusAtom(e->Iex.Qop.arg1)
4583                      || isBogusAtom(e->Iex.Qop.arg2)
4584                      || isBogusAtom(e->Iex.Qop.arg3)
4585                      || isBogusAtom(e->Iex.Qop.arg4);
4586            case Iex_Mux0X:
4587               return isBogusAtom(e->Iex.Mux0X.cond)
4588                      || isBogusAtom(e->Iex.Mux0X.expr0)
4589                      || isBogusAtom(e->Iex.Mux0X.exprX);
4590            case Iex_Load:
4591               return isBogusAtom(e->Iex.Load.addr);
4592            case Iex_CCall:
4593               for (i = 0; e->Iex.CCall.args[i]; i++)
4594                  if (isBogusAtom(e->Iex.CCall.args[i]))
4595                     return True;
4596               return False;
4597            default:
4598               goto unhandled;
4599         }
4600      case Ist_Dirty:
4601         d = st->Ist.Dirty.details;
4602         for (i = 0; d->args[i]; i++)
4603            if (isBogusAtom(d->args[i]))
4604               return True;
4605         if (d->guard && isBogusAtom(d->guard))
4606            return True;
4607         if (d->mAddr && isBogusAtom(d->mAddr))
4608            return True;
4609         return False;
4610      case Ist_Put:
4611         return isBogusAtom(st->Ist.Put.data);
4612      case Ist_PutI:
4613         return isBogusAtom(st->Ist.PutI.ix)
4614                || isBogusAtom(st->Ist.PutI.data);
4615      case Ist_Store:
4616         return isBogusAtom(st->Ist.Store.addr)
4617                || isBogusAtom(st->Ist.Store.data);
4618      case Ist_Exit:
4619         return isBogusAtom(st->Ist.Exit.guard);
4620      case Ist_AbiHint:
4621         return isBogusAtom(st->Ist.AbiHint.base)
4622                || isBogusAtom(st->Ist.AbiHint.nia);
4623      case Ist_NoOp:
4624      case Ist_IMark:
4625      case Ist_MBE:
4626         return False;
4627      case Ist_CAS:
4628         cas = st->Ist.CAS.details;
4629         return isBogusAtom(cas->addr)
4630                || (cas->expdHi ? isBogusAtom(cas->expdHi) : False)
4631                || isBogusAtom(cas->expdLo)
4632                || (cas->dataHi ? isBogusAtom(cas->dataHi) : False)
4633                || isBogusAtom(cas->dataLo);
4634      case Ist_LLSC:
4635         return isBogusAtom(st->Ist.LLSC.addr)
4636                || (st->Ist.LLSC.storedata
4637                       ? isBogusAtom(st->Ist.LLSC.storedata)
4638                       : False);
4639      default:
4640      unhandled:
4641         ppIRStmt(st);
4642         VG_(tool_panic)("hasBogusLiterals");
4643   }
4644}
4645
4646
4647IRSB* MC_(instrument) ( VgCallbackClosure* closure,
4648                        IRSB* sb_in,
4649                        VexGuestLayout* layout,
4650                        VexGuestExtents* vge,
4651                        IRType gWordTy, IRType hWordTy )
4652{
4653   Bool    verboze = 0||False;
4654   Bool    bogus;
4655   Int     i, j, first_stmt;
4656   IRStmt* st;
4657   MCEnv   mce;
4658   IRSB*   sb_out;
4659
4660   if (gWordTy != hWordTy) {
4661      /* We don't currently support this case. */
4662      VG_(tool_panic)("host/guest word size mismatch");
4663   }
4664
4665   /* Check we're not completely nuts */
4666   tl_assert(sizeof(UWord)  == sizeof(void*));
4667   tl_assert(sizeof(Word)   == sizeof(void*));
4668   tl_assert(sizeof(Addr)   == sizeof(void*));
4669   tl_assert(sizeof(ULong)  == 8);
4670   tl_assert(sizeof(Long)   == 8);
4671   tl_assert(sizeof(Addr64) == 8);
4672   tl_assert(sizeof(UInt)   == 4);
4673   tl_assert(sizeof(Int)    == 4);
4674
4675   tl_assert(MC_(clo_mc_level) >= 1 && MC_(clo_mc_level) <= 3);
4676
4677   /* Set up SB */
4678   sb_out = deepCopyIRSBExceptStmts(sb_in);
4679
4680   /* Set up the running environment.  Both .sb and .tmpMap are
4681      modified as we go along.  Note that tmps are added to both
4682      .sb->tyenv and .tmpMap together, so the valid index-set for
4683      those two arrays should always be identical. */
4684   VG_(memset)(&mce, 0, sizeof(mce));
4685   mce.sb             = sb_out;
4686   mce.trace          = verboze;
4687   mce.layout         = layout;
4688   mce.hWordTy        = hWordTy;
4689   mce.bogusLiterals  = False;
4690
4691   mce.tmpMap = VG_(newXA)( VG_(malloc), "mc.MC_(instrument).1", VG_(free),
4692                            sizeof(TempMapEnt));
4693   for (i = 0; i < sb_in->tyenv->types_used; i++) {
4694      TempMapEnt ent;
4695      ent.kind    = Orig;
4696      ent.shadowV = IRTemp_INVALID;
4697      ent.shadowB = IRTemp_INVALID;
4698      VG_(addToXA)( mce.tmpMap, &ent );
4699   }
4700   tl_assert( VG_(sizeXA)( mce.tmpMap ) == sb_in->tyenv->types_used );
4701
4702   /* Make a preliminary inspection of the statements, to see if there
4703      are any dodgy-looking literals.  If there are, we generate
4704      extra-detailed (hence extra-expensive) instrumentation in
4705      places.  Scan the whole bb even if dodgyness is found earlier,
4706      so that the flatness assertion is applied to all stmts. */
4707
4708   bogus = False;
4709
4710   for (i = 0; i < sb_in->stmts_used; i++) {
4711
4712      st = sb_in->stmts[i];
4713      tl_assert(st);
4714      tl_assert(isFlatIRStmt(st));
4715
4716      if (!bogus) {
4717         bogus = checkForBogusLiterals(st);
4718         if (0 && bogus) {
4719            VG_(printf)("bogus: ");
4720            ppIRStmt(st);
4721            VG_(printf)("\n");
4722         }
4723      }
4724
4725   }
4726
4727   mce.bogusLiterals = bogus;
4728
4729   /* Copy verbatim any IR preamble preceding the first IMark */
4730
4731   tl_assert(mce.sb == sb_out);
4732   tl_assert(mce.sb != sb_in);
4733
4734   i = 0;
4735   while (i < sb_in->stmts_used && sb_in->stmts[i]->tag != Ist_IMark) {
4736
4737      st = sb_in->stmts[i];
4738      tl_assert(st);
4739      tl_assert(isFlatIRStmt(st));
4740
4741      stmt( 'C', &mce, sb_in->stmts[i] );
4742      i++;
4743   }
4744
4745   /* Nasty problem.  IR optimisation of the pre-instrumented IR may
4746      cause the IR following the preamble to contain references to IR
4747      temporaries defined in the preamble.  Because the preamble isn't
4748      instrumented, these temporaries don't have any shadows.
4749      Nevertheless uses of them following the preamble will cause
4750      memcheck to generate references to their shadows.  End effect is
4751      to cause IR sanity check failures, due to references to
4752      non-existent shadows.  This is only evident for the complex
4753      preambles used for function wrapping on TOC-afflicted platforms
4754      (ppc64-linux, ppc32-aix5, ppc64-aix5).
4755
4756      The following loop therefore scans the preamble looking for
4757      assignments to temporaries.  For each one found it creates an
4758      assignment to the corresponding (V) shadow temp, marking it as
4759      'defined'.  This is the same resulting IR as if the main
4760      instrumentation loop before had been applied to the statement
4761      'tmp = CONSTANT'.
4762
4763      Similarly, if origin tracking is enabled, we must generate an
4764      assignment for the corresponding origin (B) shadow, claiming
4765      no-origin, as appropriate for a defined value.
4766   */
4767   for (j = 0; j < i; j++) {
4768      if (sb_in->stmts[j]->tag == Ist_WrTmp) {
4769         /* findShadowTmpV checks its arg is an original tmp;
4770            no need to assert that here. */
4771         IRTemp tmp_o = sb_in->stmts[j]->Ist.WrTmp.tmp;
4772         IRTemp tmp_v = findShadowTmpV(&mce, tmp_o);
4773         IRType ty_v  = typeOfIRTemp(sb_out->tyenv, tmp_v);
4774         assign( 'V', &mce, tmp_v, definedOfType( ty_v ) );
4775         if (MC_(clo_mc_level) == 3) {
4776            IRTemp tmp_b = findShadowTmpB(&mce, tmp_o);
4777            tl_assert(typeOfIRTemp(sb_out->tyenv, tmp_b) == Ity_I32);
4778            assign( 'B', &mce, tmp_b, mkU32(0)/* UNKNOWN ORIGIN */);
4779         }
4780         if (0) {
4781            VG_(printf)("create shadow tmp(s) for preamble tmp [%d] ty ", j);
4782            ppIRType( ty_v );
4783            VG_(printf)("\n");
4784         }
4785      }
4786   }
4787
4788   /* Iterate over the remaining stmts to generate instrumentation. */
4789
4790   tl_assert(sb_in->stmts_used > 0);
4791   tl_assert(i >= 0);
4792   tl_assert(i < sb_in->stmts_used);
4793   tl_assert(sb_in->stmts[i]->tag == Ist_IMark);
4794
4795   for (/* use current i*/; i < sb_in->stmts_used; i++) {
4796
4797      st = sb_in->stmts[i];
4798      first_stmt = sb_out->stmts_used;
4799
4800      if (verboze) {
4801         VG_(printf)("\n");
4802         ppIRStmt(st);
4803         VG_(printf)("\n");
4804      }
4805
4806      if (MC_(clo_mc_level) == 3) {
4807         /* See comments on case Ist_CAS below. */
4808         if (st->tag != Ist_CAS)
4809            schemeS( &mce, st );
4810      }
4811
4812      /* Generate instrumentation code for each stmt ... */
4813
4814      switch (st->tag) {
4815
4816         case Ist_WrTmp:
4817            assign( 'V', &mce, findShadowTmpV(&mce, st->Ist.WrTmp.tmp),
4818                               expr2vbits( &mce, st->Ist.WrTmp.data) );
4819            break;
4820
4821         case Ist_Put:
4822            do_shadow_PUT( &mce,
4823                           st->Ist.Put.offset,
4824                           st->Ist.Put.data,
4825                           NULL /* shadow atom */ );
4826            break;
4827
4828         case Ist_PutI:
4829            do_shadow_PUTI( &mce,
4830                            st->Ist.PutI.descr,
4831                            st->Ist.PutI.ix,
4832                            st->Ist.PutI.bias,
4833                            st->Ist.PutI.data );
4834            break;
4835
4836         case Ist_Store:
4837            do_shadow_Store( &mce, st->Ist.Store.end,
4838                                   st->Ist.Store.addr, 0/* addr bias */,
4839                                   st->Ist.Store.data,
4840                                   NULL /* shadow data */,
4841                                   NULL/*guard*/ );
4842            break;
4843
4844         case Ist_Exit:
4845            complainIfUndefined( &mce, st->Ist.Exit.guard );
4846            break;
4847
4848         case Ist_IMark:
4849            break;
4850
4851         case Ist_NoOp:
4852         case Ist_MBE:
4853            break;
4854
4855         case Ist_Dirty:
4856            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
4857            break;
4858
4859         case Ist_AbiHint:
4860            do_AbiHint( &mce, st->Ist.AbiHint.base,
4861                              st->Ist.AbiHint.len,
4862                              st->Ist.AbiHint.nia );
4863            break;
4864
4865         case Ist_CAS:
4866            do_shadow_CAS( &mce, st->Ist.CAS.details );
4867            /* Note, do_shadow_CAS copies the CAS itself to the output
4868               block, because it needs to add instrumentation both
4869               before and after it.  Hence skip the copy below.  Also
4870               skip the origin-tracking stuff (call to schemeS) above,
4871               since that's all tangled up with it too; do_shadow_CAS
4872               does it all. */
4873            break;
4874
4875         case Ist_LLSC:
4876            do_shadow_LLSC( &mce,
4877                            st->Ist.LLSC.end,
4878                            st->Ist.LLSC.result,
4879                            st->Ist.LLSC.addr,
4880                            st->Ist.LLSC.storedata );
4881            break;
4882
4883         default:
4884            VG_(printf)("\n");
4885            ppIRStmt(st);
4886            VG_(printf)("\n");
4887            VG_(tool_panic)("memcheck: unhandled IRStmt");
4888
4889      } /* switch (st->tag) */
4890
4891      if (0 && verboze) {
4892         for (j = first_stmt; j < sb_out->stmts_used; j++) {
4893            VG_(printf)("   ");
4894            ppIRStmt(sb_out->stmts[j]);
4895            VG_(printf)("\n");
4896         }
4897         VG_(printf)("\n");
4898      }
4899
4900      /* ... and finally copy the stmt itself to the output.  Except,
4901         skip the copy of IRCASs; see comments on case Ist_CAS
4902         above. */
4903      if (st->tag != Ist_CAS)
4904         stmt('C', &mce, st);
4905   }
4906
4907   /* Now we need to complain if the jump target is undefined. */
4908   first_stmt = sb_out->stmts_used;
4909
4910   if (verboze) {
4911      VG_(printf)("sb_in->next = ");
4912      ppIRExpr(sb_in->next);
4913      VG_(printf)("\n\n");
4914   }
4915
4916   complainIfUndefined( &mce, sb_in->next );
4917
4918   if (0 && verboze) {
4919      for (j = first_stmt; j < sb_out->stmts_used; j++) {
4920         VG_(printf)("   ");
4921         ppIRStmt(sb_out->stmts[j]);
4922         VG_(printf)("\n");
4923      }
4924      VG_(printf)("\n");
4925   }
4926
4927   /* If this fails, there's been some serious snafu with tmp management,
4928      that should be investigated. */
4929   tl_assert( VG_(sizeXA)( mce.tmpMap ) == mce.sb->tyenv->types_used );
4930   VG_(deleteXA)( mce.tmpMap );
4931
4932   tl_assert(mce.sb == sb_out);
4933   return sb_out;
4934}
4935
4936/*------------------------------------------------------------*/
4937/*--- Post-tree-build final tidying                        ---*/
4938/*------------------------------------------------------------*/
4939
4940/* This exploits the observation that Memcheck often produces
4941   repeated conditional calls of the form
4942
4943   Dirty G MC_(helperc_value_check0/1/4/8_fail)(UInt otag)
4944
4945   with the same guard expression G guarding the same helper call.
4946   The second and subsequent calls are redundant.  This usually
4947   results from instrumentation of guest code containing multiple
4948   memory references at different constant offsets from the same base
4949   register.  After optimisation of the instrumentation, you get a
4950   test for the definedness of the base register for each memory
4951   reference, which is kinda pointless.  MC_(final_tidy) therefore
4952   looks for such repeated calls and removes all but the first. */
4953
4954/* A struct for recording which (helper, guard) pairs we have already
4955   seen. */
4956typedef
4957   struct { void* entry; IRExpr* guard; }
4958   Pair;
4959
4960/* Return True if e1 and e2 definitely denote the same value (used to
4961   compare guards).  Return False if unknown; False is the safe
4962   answer.  Since guest registers and guest memory do not have the
4963   SSA property we must return False if any Gets or Loads appear in
4964   the expression. */
4965
4966static Bool sameIRValue ( IRExpr* e1, IRExpr* e2 )
4967{
4968   if (e1->tag != e2->tag)
4969      return False;
4970   switch (e1->tag) {
4971      case Iex_Const:
4972         return eqIRConst( e1->Iex.Const.con, e2->Iex.Const.con );
4973      case Iex_Binop:
4974         return e1->Iex.Binop.op == e2->Iex.Binop.op
4975                && sameIRValue(e1->Iex.Binop.arg1, e2->Iex.Binop.arg1)
4976                && sameIRValue(e1->Iex.Binop.arg2, e2->Iex.Binop.arg2);
4977      case Iex_Unop:
4978         return e1->Iex.Unop.op == e2->Iex.Unop.op
4979                && sameIRValue(e1->Iex.Unop.arg, e2->Iex.Unop.arg);
4980      case Iex_RdTmp:
4981         return e1->Iex.RdTmp.tmp == e2->Iex.RdTmp.tmp;
4982      case Iex_Mux0X:
4983         return sameIRValue( e1->Iex.Mux0X.cond, e2->Iex.Mux0X.cond )
4984                && sameIRValue( e1->Iex.Mux0X.expr0, e2->Iex.Mux0X.expr0 )
4985                && sameIRValue( e1->Iex.Mux0X.exprX, e2->Iex.Mux0X.exprX );
4986      case Iex_Qop:
4987      case Iex_Triop:
4988      case Iex_CCall:
4989         /* be lazy.  Could define equality for these, but they never
4990            appear to be used. */
4991         return False;
4992      case Iex_Get:
4993      case Iex_GetI:
4994      case Iex_Load:
4995         /* be conservative - these may not give the same value each
4996            time */
4997         return False;
4998      case Iex_Binder:
4999         /* should never see this */
5000         /* fallthrough */
5001      default:
5002         VG_(printf)("mc_translate.c: sameIRValue: unhandled: ");
5003         ppIRExpr(e1);
5004         VG_(tool_panic)("memcheck:sameIRValue");
5005         return False;
5006   }
5007}
5008
5009/* See if 'pairs' already has an entry for (entry, guard).  Return
5010   True if so.  If not, add an entry. */
5011
5012static
5013Bool check_or_add ( XArray* /*of Pair*/ pairs, IRExpr* guard, void* entry )
5014{
5015   Pair  p;
5016   Pair* pp;
5017   Int   i, n = VG_(sizeXA)( pairs );
5018   for (i = 0; i < n; i++) {
5019      pp = VG_(indexXA)( pairs, i );
5020      if (pp->entry == entry && sameIRValue(pp->guard, guard))
5021         return True;
5022   }
5023   p.guard = guard;
5024   p.entry = entry;
5025   VG_(addToXA)( pairs, &p );
5026   return False;
5027}
5028
5029static Bool is_helperc_value_checkN_fail ( HChar* name )
5030{
5031   return
5032      0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_no_o)")
5033      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_no_o)")
5034      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_no_o)")
5035      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_no_o)")
5036      || 0==VG_(strcmp)(name, "MC_(helperc_value_check0_fail_w_o)")
5037      || 0==VG_(strcmp)(name, "MC_(helperc_value_check1_fail_w_o)")
5038      || 0==VG_(strcmp)(name, "MC_(helperc_value_check4_fail_w_o)")
5039      || 0==VG_(strcmp)(name, "MC_(helperc_value_check8_fail_w_o)");
5040}
5041
5042IRSB* MC_(final_tidy) ( IRSB* sb_in )
5043{
5044   Int i;
5045   IRStmt*   st;
5046   IRDirty*  di;
5047   IRExpr*   guard;
5048   IRCallee* cee;
5049   Bool      alreadyPresent;
5050   XArray*   pairs = VG_(newXA)( VG_(malloc), "mc.ft.1",
5051                                 VG_(free), sizeof(Pair) );
5052   /* Scan forwards through the statements.  Each time a call to one
5053      of the relevant helpers is seen, check if we have made a
5054      previous call to the same helper using the same guard
5055      expression, and if so, delete the call. */
5056   for (i = 0; i < sb_in->stmts_used; i++) {
5057      st = sb_in->stmts[i];
5058      tl_assert(st);
5059      if (st->tag != Ist_Dirty)
5060         continue;
5061      di = st->Ist.Dirty.details;
5062      guard = di->guard;
5063      if (!guard)
5064         continue;
5065      if (0) { ppIRExpr(guard); VG_(printf)("\n"); }
5066      cee = di->cee;
5067      if (!is_helperc_value_checkN_fail( cee->name ))
5068         continue;
5069       /* Ok, we have a call to helperc_value_check0/1/4/8_fail with
5070          guard 'guard'.  Check if we have already seen a call to this
5071          function with the same guard.  If so, delete it.  If not,
5072          add it to the set of calls we do know about. */
5073      alreadyPresent = check_or_add( pairs, guard, cee->addr );
5074      if (alreadyPresent) {
5075         sb_in->stmts[i] = IRStmt_NoOp();
5076         if (0) VG_(printf)("XX\n");
5077      }
5078   }
5079   VG_(deleteXA)( pairs );
5080   return sb_in;
5081}
5082
5083
5084/*------------------------------------------------------------*/
5085/*--- Origin tracking stuff                                ---*/
5086/*------------------------------------------------------------*/
5087
5088/* Almost identical to findShadowTmpV. */
5089static IRTemp findShadowTmpB ( MCEnv* mce, IRTemp orig )
5090{
5091   TempMapEnt* ent;
5092   /* VG_(indexXA) range-checks 'orig', hence no need to check
5093      here. */
5094   ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5095   tl_assert(ent->kind == Orig);
5096   if (ent->shadowB == IRTemp_INVALID) {
5097      IRTemp tmpB
5098        = newTemp( mce, Ity_I32, BSh );
5099      /* newTemp may cause mce->tmpMap to resize, hence previous results
5100         from VG_(indexXA) are invalid. */
5101      ent = (TempMapEnt*)VG_(indexXA)( mce->tmpMap, (Word)orig );
5102      tl_assert(ent->kind == Orig);
5103      tl_assert(ent->shadowB == IRTemp_INVALID);
5104      ent->shadowB = tmpB;
5105   }
5106   return ent->shadowB;
5107}
5108
5109static IRAtom* gen_maxU32 ( MCEnv* mce, IRAtom* b1, IRAtom* b2 )
5110{
5111   return assignNew( 'B', mce, Ity_I32, binop(Iop_Max32U, b1, b2) );
5112}
5113
5114static IRAtom* gen_load_b ( MCEnv* mce, Int szB,
5115                            IRAtom* baseaddr, Int offset )
5116{
5117   void*    hFun;
5118   HChar*   hName;
5119   IRTemp   bTmp;
5120   IRDirty* di;
5121   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5122   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5123   IRAtom*  ea    = baseaddr;
5124   if (offset != 0) {
5125      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5126                                   : mkU64( (Long)(Int)offset );
5127      ea = assignNew( 'B', mce, aTy, binop(opAdd, ea, off));
5128   }
5129   bTmp = newTemp(mce, mce->hWordTy, BSh);
5130
5131   switch (szB) {
5132      case 1: hFun  = (void*)&MC_(helperc_b_load1);
5133              hName = "MC_(helperc_b_load1)";
5134              break;
5135      case 2: hFun  = (void*)&MC_(helperc_b_load2);
5136              hName = "MC_(helperc_b_load2)";
5137              break;
5138      case 4: hFun  = (void*)&MC_(helperc_b_load4);
5139              hName = "MC_(helperc_b_load4)";
5140              break;
5141      case 8: hFun  = (void*)&MC_(helperc_b_load8);
5142              hName = "MC_(helperc_b_load8)";
5143              break;
5144      case 16: hFun  = (void*)&MC_(helperc_b_load16);
5145               hName = "MC_(helperc_b_load16)";
5146               break;
5147      default:
5148         VG_(printf)("mc_translate.c: gen_load_b: unhandled szB == %d\n", szB);
5149         tl_assert(0);
5150   }
5151   di = unsafeIRDirty_1_N(
5152           bTmp, 1/*regparms*/, hName, VG_(fnptr_to_fnentry)( hFun ),
5153           mkIRExprVec_1( ea )
5154        );
5155   /* no need to mess with any annotations.  This call accesses
5156      neither guest state nor guest memory. */
5157   stmt( 'B', mce, IRStmt_Dirty(di) );
5158   if (mce->hWordTy == Ity_I64) {
5159      /* 64-bit host */
5160      IRTemp bTmp32 = newTemp(mce, Ity_I32, BSh);
5161      assign( 'B', mce, bTmp32, unop(Iop_64to32, mkexpr(bTmp)) );
5162      return mkexpr(bTmp32);
5163   } else {
5164      /* 32-bit host */
5165      return mkexpr(bTmp);
5166   }
5167}
5168
5169/* Generate a shadow store.  guard :: Ity_I1 controls whether the
5170   store really happens; NULL means it unconditionally does. */
5171static void gen_store_b ( MCEnv* mce, Int szB,
5172                          IRAtom* baseaddr, Int offset, IRAtom* dataB,
5173                          IRAtom* guard )
5174{
5175   void*    hFun;
5176   HChar*   hName;
5177   IRDirty* di;
5178   IRType   aTy   = typeOfIRExpr( mce->sb->tyenv, baseaddr );
5179   IROp     opAdd = aTy == Ity_I32 ? Iop_Add32 : Iop_Add64;
5180   IRAtom*  ea    = baseaddr;
5181   if (guard) {
5182      tl_assert(isOriginalAtom(mce, guard));
5183      tl_assert(typeOfIRExpr(mce->sb->tyenv, guard) == Ity_I1);
5184   }
5185   if (offset != 0) {
5186      IRAtom* off = aTy == Ity_I32 ? mkU32( offset )
5187                                   : mkU64( (Long)(Int)offset );
5188      ea = assignNew(  'B', mce, aTy, binop(opAdd, ea, off));
5189   }
5190   if (mce->hWordTy == Ity_I64)
5191      dataB = assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, dataB));
5192
5193   switch (szB) {
5194      case 1: hFun  = (void*)&MC_(helperc_b_store1);
5195              hName = "MC_(helperc_b_store1)";
5196              break;
5197      case 2: hFun  = (void*)&MC_(helperc_b_store2);
5198              hName = "MC_(helperc_b_store2)";
5199              break;
5200      case 4: hFun  = (void*)&MC_(helperc_b_store4);
5201              hName = "MC_(helperc_b_store4)";
5202              break;
5203      case 8: hFun  = (void*)&MC_(helperc_b_store8);
5204              hName = "MC_(helperc_b_store8)";
5205              break;
5206      case 16: hFun  = (void*)&MC_(helperc_b_store16);
5207               hName = "MC_(helperc_b_store16)";
5208               break;
5209      default:
5210         tl_assert(0);
5211   }
5212   di = unsafeIRDirty_0_N( 2/*regparms*/,
5213           hName, VG_(fnptr_to_fnentry)( hFun ),
5214           mkIRExprVec_2( ea, dataB )
5215        );
5216   /* no need to mess with any annotations.  This call accesses
5217      neither guest state nor guest memory. */
5218   if (guard) di->guard = guard;
5219   stmt( 'B', mce, IRStmt_Dirty(di) );
5220}
5221
5222static IRAtom* narrowTo32 ( MCEnv* mce, IRAtom* e ) {
5223   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5224   if (eTy == Ity_I64)
5225      return assignNew( 'B', mce, Ity_I32, unop(Iop_64to32, e) );
5226   if (eTy == Ity_I32)
5227      return e;
5228   tl_assert(0);
5229}
5230
5231static IRAtom* zWidenFrom32 ( MCEnv* mce, IRType dstTy, IRAtom* e ) {
5232   IRType eTy = typeOfIRExpr(mce->sb->tyenv, e);
5233   tl_assert(eTy == Ity_I32);
5234   if (dstTy == Ity_I64)
5235      return assignNew( 'B', mce, Ity_I64, unop(Iop_32Uto64, e) );
5236   tl_assert(0);
5237}
5238
5239
5240static IRAtom* schemeE ( MCEnv* mce, IRExpr* e )
5241{
5242   tl_assert(MC_(clo_mc_level) == 3);
5243
5244   switch (e->tag) {
5245
5246      case Iex_GetI: {
5247         IRRegArray* descr_b;
5248         IRAtom      *t1, *t2, *t3, *t4;
5249         IRRegArray* descr      = e->Iex.GetI.descr;
5250         IRType equivIntTy
5251            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5252         /* If this array is unshadowable for whatever reason, use the
5253            usual approximation. */
5254         if (equivIntTy == Ity_INVALID)
5255            return mkU32(0);
5256         tl_assert(sizeofIRType(equivIntTy) >= 4);
5257         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5258         descr_b = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5259                                 equivIntTy, descr->nElems );
5260         /* Do a shadow indexed get of the same size, giving t1.  Take
5261            the bottom 32 bits of it, giving t2.  Compute into t3 the
5262            origin for the index (almost certainly zero, but there's
5263            no harm in being completely general here, since iropt will
5264            remove any useless code), and fold it in, giving a final
5265            value t4. */
5266         t1 = assignNew( 'B', mce, equivIntTy,
5267                          IRExpr_GetI( descr_b, e->Iex.GetI.ix,
5268                                                e->Iex.GetI.bias ));
5269         t2 = narrowTo32( mce, t1 );
5270         t3 = schemeE( mce, e->Iex.GetI.ix );
5271         t4 = gen_maxU32( mce, t2, t3 );
5272         return t4;
5273      }
5274      case Iex_CCall: {
5275         Int i;
5276         IRAtom*  here;
5277         IRExpr** args = e->Iex.CCall.args;
5278         IRAtom*  curr = mkU32(0);
5279         for (i = 0; args[i]; i++) {
5280            tl_assert(i < 32);
5281            tl_assert(isOriginalAtom(mce, args[i]));
5282            /* Only take notice of this arg if the callee's
5283               mc-exclusion mask does not say it is to be excluded. */
5284            if (e->Iex.CCall.cee->mcx_mask & (1<<i)) {
5285               /* the arg is to be excluded from definedness checking.
5286                  Do nothing. */
5287               if (0) VG_(printf)("excluding %s(%d)\n",
5288                                  e->Iex.CCall.cee->name, i);
5289            } else {
5290               /* calculate the arg's definedness, and pessimistically
5291                  merge it in. */
5292               here = schemeE( mce, args[i] );
5293               curr = gen_maxU32( mce, curr, here );
5294            }
5295         }
5296         return curr;
5297      }
5298      case Iex_Load: {
5299         Int dszB;
5300         dszB = sizeofIRType(e->Iex.Load.ty);
5301         /* assert that the B value for the address is already
5302            available (somewhere) */
5303         tl_assert(isIRAtom(e->Iex.Load.addr));
5304         tl_assert(mce->hWordTy == Ity_I32 || mce->hWordTy == Ity_I64);
5305         return gen_load_b( mce, dszB, e->Iex.Load.addr, 0 );
5306      }
5307      case Iex_Mux0X: {
5308         IRAtom* b1 = schemeE( mce, e->Iex.Mux0X.cond );
5309         IRAtom* b2 = schemeE( mce, e->Iex.Mux0X.expr0 );
5310         IRAtom* b3 = schemeE( mce, e->Iex.Mux0X.exprX );
5311         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ));
5312      }
5313      case Iex_Qop: {
5314         IRAtom* b1 = schemeE( mce, e->Iex.Qop.arg1 );
5315         IRAtom* b2 = schemeE( mce, e->Iex.Qop.arg2 );
5316         IRAtom* b3 = schemeE( mce, e->Iex.Qop.arg3 );
5317         IRAtom* b4 = schemeE( mce, e->Iex.Qop.arg4 );
5318         return gen_maxU32( mce, gen_maxU32( mce, b1, b2 ),
5319                                 gen_maxU32( mce, b3, b4 ) );
5320      }
5321      case Iex_Triop: {
5322         IRAtom* b1 = schemeE( mce, e->Iex.Triop.arg1 );
5323         IRAtom* b2 = schemeE( mce, e->Iex.Triop.arg2 );
5324         IRAtom* b3 = schemeE( mce, e->Iex.Triop.arg3 );
5325         return gen_maxU32( mce, b1, gen_maxU32( mce, b2, b3 ) );
5326      }
5327      case Iex_Binop: {
5328         switch (e->Iex.Binop.op) {
5329            case Iop_CasCmpEQ8:  case Iop_CasCmpNE8:
5330            case Iop_CasCmpEQ16: case Iop_CasCmpNE16:
5331            case Iop_CasCmpEQ32: case Iop_CasCmpNE32:
5332            case Iop_CasCmpEQ64: case Iop_CasCmpNE64:
5333               /* Just say these all produce a defined result,
5334                  regardless of their arguments.  See
5335                  COMMENT_ON_CasCmpEQ in this file. */
5336               return mkU32(0);
5337            default: {
5338               IRAtom* b1 = schemeE( mce, e->Iex.Binop.arg1 );
5339               IRAtom* b2 = schemeE( mce, e->Iex.Binop.arg2 );
5340               return gen_maxU32( mce, b1, b2 );
5341            }
5342         }
5343         tl_assert(0);
5344         /*NOTREACHED*/
5345      }
5346      case Iex_Unop: {
5347         IRAtom* b1 = schemeE( mce, e->Iex.Unop.arg );
5348         return b1;
5349      }
5350      case Iex_Const:
5351         return mkU32(0);
5352      case Iex_RdTmp:
5353         return mkexpr( findShadowTmpB( mce, e->Iex.RdTmp.tmp ));
5354      case Iex_Get: {
5355         Int b_offset = MC_(get_otrack_shadow_offset)(
5356                           e->Iex.Get.offset,
5357                           sizeofIRType(e->Iex.Get.ty)
5358                        );
5359         tl_assert(b_offset >= -1
5360                   && b_offset <= mce->layout->total_sizeB -4);
5361         if (b_offset >= 0) {
5362            /* FIXME: this isn't an atom! */
5363            return IRExpr_Get( b_offset + 2*mce->layout->total_sizeB,
5364                               Ity_I32 );
5365         }
5366         return mkU32(0);
5367      }
5368      default:
5369         VG_(printf)("mc_translate.c: schemeE: unhandled: ");
5370         ppIRExpr(e);
5371         VG_(tool_panic)("memcheck:schemeE");
5372   }
5373}
5374
5375
5376static void do_origins_Dirty ( MCEnv* mce, IRDirty* d )
5377{
5378   // This is a hacked version of do_shadow_Dirty
5379   Int       i, n, toDo, gSz, gOff;
5380   IRAtom    *here, *curr;
5381   IRTemp    dst;
5382
5383   /* First check the guard. */
5384   curr = schemeE( mce, d->guard );
5385
5386   /* Now round up all inputs and maxU32 over them. */
5387
5388   /* Inputs: unmasked args */
5389   for (i = 0; d->args[i]; i++) {
5390      if (d->cee->mcx_mask & (1<<i)) {
5391         /* ignore this arg */
5392      } else {
5393         here = schemeE( mce, d->args[i] );
5394         curr = gen_maxU32( mce, curr, here );
5395      }
5396   }
5397
5398   /* Inputs: guest state that we read. */
5399   for (i = 0; i < d->nFxState; i++) {
5400      tl_assert(d->fxState[i].fx != Ifx_None);
5401      if (d->fxState[i].fx == Ifx_Write)
5402         continue;
5403
5404      /* Ignore any sections marked as 'always defined'. */
5405      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
5406         if (0)
5407         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
5408                     d->fxState[i].offset, d->fxState[i].size );
5409         continue;
5410      }
5411
5412      /* This state element is read or modified.  So we need to
5413         consider it.  If larger than 4 bytes, deal with it in 4-byte
5414         chunks. */
5415      gSz  = d->fxState[i].size;
5416      gOff = d->fxState[i].offset;
5417      tl_assert(gSz > 0);
5418      while (True) {
5419         Int b_offset;
5420         if (gSz == 0) break;
5421         n = gSz <= 4 ? gSz : 4;
5422         /* update 'curr' with maxU32 of the state slice
5423            gOff .. gOff+n-1 */
5424         b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5425         if (b_offset != -1) {
5426            here = assignNew( 'B',mce,
5427                               Ity_I32,
5428                               IRExpr_Get(b_offset + 2*mce->layout->total_sizeB,
5429                                          Ity_I32));
5430            curr = gen_maxU32( mce, curr, here );
5431         }
5432         gSz -= n;
5433         gOff += n;
5434      }
5435
5436   }
5437
5438   /* Inputs: memory */
5439
5440   if (d->mFx != Ifx_None) {
5441      /* Because we may do multiple shadow loads/stores from the same
5442         base address, it's best to do a single test of its
5443         definedness right now.  Post-instrumentation optimisation
5444         should remove all but this test. */
5445      tl_assert(d->mAddr);
5446      here = schemeE( mce, d->mAddr );
5447      curr = gen_maxU32( mce, curr, here );
5448   }
5449
5450   /* Deal with memory inputs (reads or modifies) */
5451   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
5452      toDo   = d->mSize;
5453      /* chew off 32-bit chunks.  We don't care about the endianness
5454         since it's all going to be condensed down to a single bit,
5455         but nevertheless choose an endianness which is hopefully
5456         native to the platform. */
5457      while (toDo >= 4) {
5458         here = gen_load_b( mce, 4, d->mAddr, d->mSize - toDo );
5459         curr = gen_maxU32( mce, curr, here );
5460         toDo -= 4;
5461      }
5462      /* handle possible 16-bit excess */
5463      while (toDo >= 2) {
5464         here = gen_load_b( mce, 2, d->mAddr, d->mSize - toDo );
5465         curr = gen_maxU32( mce, curr, here );
5466         toDo -= 2;
5467      }
5468      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5469   }
5470
5471   /* Whew!  So curr is a 32-bit B-value which should give an origin
5472      of some use if any of the inputs to the helper are undefined.
5473      Now we need to re-distribute the results to all destinations. */
5474
5475   /* Outputs: the destination temporary, if there is one. */
5476   if (d->tmp != IRTemp_INVALID) {
5477      dst   = findShadowTmpB(mce, d->tmp);
5478      assign( 'V', mce, dst, curr );
5479   }
5480
5481   /* Outputs: guest state that we write or modify. */
5482   for (i = 0; i < d->nFxState; i++) {
5483      tl_assert(d->fxState[i].fx != Ifx_None);
5484      if (d->fxState[i].fx == Ifx_Read)
5485         continue;
5486
5487      /* Ignore any sections marked as 'always defined'. */
5488      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
5489         continue;
5490
5491      /* This state element is written or modified.  So we need to
5492         consider it.  If larger than 4 bytes, deal with it in 4-byte
5493         chunks. */
5494      gSz  = d->fxState[i].size;
5495      gOff = d->fxState[i].offset;
5496      tl_assert(gSz > 0);
5497      while (True) {
5498         Int b_offset;
5499         if (gSz == 0) break;
5500         n = gSz <= 4 ? gSz : 4;
5501         /* Write 'curr' to the state slice gOff .. gOff+n-1 */
5502         b_offset = MC_(get_otrack_shadow_offset)(gOff, 4);
5503         if (b_offset != -1) {
5504           stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5505                                      curr ));
5506         }
5507         gSz -= n;
5508         gOff += n;
5509      }
5510   }
5511
5512   /* Outputs: memory that we write or modify.  Same comments about
5513      endianness as above apply. */
5514   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
5515      toDo   = d->mSize;
5516      /* chew off 32-bit chunks */
5517      while (toDo >= 4) {
5518         gen_store_b( mce, 4, d->mAddr, d->mSize - toDo, curr,
5519                      NULL/*guard*/ );
5520         toDo -= 4;
5521      }
5522      /* handle possible 16-bit excess */
5523      while (toDo >= 2) {
5524        gen_store_b( mce, 2, d->mAddr, d->mSize - toDo, curr,
5525                     NULL/*guard*/ );
5526         toDo -= 2;
5527      }
5528      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
5529   }
5530}
5531
5532
5533static void do_origins_Store ( MCEnv* mce,
5534                               IREndness stEnd,
5535                               IRExpr* stAddr,
5536                               IRExpr* stData )
5537{
5538   Int     dszB;
5539   IRAtom* dataB;
5540   /* assert that the B value for the address is already available
5541      (somewhere), since the call to schemeE will want to see it.
5542      XXXX how does this actually ensure that?? */
5543   tl_assert(isIRAtom(stAddr));
5544   tl_assert(isIRAtom(stData));
5545   dszB  = sizeofIRType( typeOfIRExpr(mce->sb->tyenv, stData ) );
5546   dataB = schemeE( mce, stData );
5547   gen_store_b( mce, dszB, stAddr, 0/*offset*/, dataB,
5548                     NULL/*guard*/ );
5549}
5550
5551
5552static void schemeS ( MCEnv* mce, IRStmt* st )
5553{
5554   tl_assert(MC_(clo_mc_level) == 3);
5555
5556   switch (st->tag) {
5557
5558      case Ist_AbiHint:
5559         /* The value-check instrumenter handles this - by arranging
5560            to pass the address of the next instruction to
5561            MC_(helperc_MAKE_STACK_UNINIT).  This is all that needs to
5562            happen for origin tracking w.r.t. AbiHints.  So there is
5563            nothing to do here. */
5564         break;
5565
5566      case Ist_PutI: {
5567         IRRegArray* descr_b;
5568         IRAtom      *t1, *t2, *t3, *t4;
5569         IRRegArray* descr = st->Ist.PutI.descr;
5570         IRType equivIntTy
5571            = MC_(get_otrack_reg_array_equiv_int_type)(descr);
5572         /* If this array is unshadowable for whatever reason,
5573            generate no code. */
5574         if (equivIntTy == Ity_INVALID)
5575            break;
5576         tl_assert(sizeofIRType(equivIntTy) >= 4);
5577         tl_assert(sizeofIRType(equivIntTy) == sizeofIRType(descr->elemTy));
5578         descr_b
5579            = mkIRRegArray( descr->base + 2*mce->layout->total_sizeB,
5580                            equivIntTy, descr->nElems );
5581         /* Compute a value to Put - the conjoinment of the origin for
5582            the data to be Put-ted (obviously) and of the index value
5583            (not so obviously). */
5584         t1 = schemeE( mce, st->Ist.PutI.data );
5585         t2 = schemeE( mce, st->Ist.PutI.ix );
5586         t3 = gen_maxU32( mce, t1, t2 );
5587         t4 = zWidenFrom32( mce, equivIntTy, t3 );
5588         stmt( 'B', mce, IRStmt_PutI( descr_b, st->Ist.PutI.ix,
5589                                      st->Ist.PutI.bias, t4 ));
5590         break;
5591      }
5592
5593      case Ist_Dirty:
5594         do_origins_Dirty( mce, st->Ist.Dirty.details );
5595         break;
5596
5597      case Ist_Store:
5598         do_origins_Store( mce, st->Ist.Store.end,
5599                                st->Ist.Store.addr,
5600                                st->Ist.Store.data );
5601         break;
5602
5603      case Ist_LLSC: {
5604         /* In short: treat a load-linked like a normal load followed
5605            by an assignment of the loaded (shadow) data the result
5606            temporary.  Treat a store-conditional like a normal store,
5607            and mark the result temporary as defined. */
5608         if (st->Ist.LLSC.storedata == NULL) {
5609            /* Load Linked */
5610            IRType resTy
5611               = typeOfIRTemp(mce->sb->tyenv, st->Ist.LLSC.result);
5612            IRExpr* vanillaLoad
5613               = IRExpr_Load(st->Ist.LLSC.end, resTy, st->Ist.LLSC.addr);
5614            tl_assert(resTy == Ity_I64 || resTy == Ity_I32
5615                      || resTy == Ity_I16 || resTy == Ity_I8);
5616            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5617                              schemeE(mce, vanillaLoad));
5618         } else {
5619            /* Store conditional */
5620            do_origins_Store( mce, st->Ist.LLSC.end,
5621                                   st->Ist.LLSC.addr,
5622                                   st->Ist.LLSC.storedata );
5623            /* For the rationale behind this, see comments at the
5624               place where the V-shadow for .result is constructed, in
5625               do_shadow_LLSC.  In short, we regard .result as
5626               always-defined. */
5627            assign( 'B', mce, findShadowTmpB(mce, st->Ist.LLSC.result),
5628                              mkU32(0) );
5629         }
5630         break;
5631      }
5632
5633      case Ist_Put: {
5634         Int b_offset
5635            = MC_(get_otrack_shadow_offset)(
5636                 st->Ist.Put.offset,
5637                 sizeofIRType(typeOfIRExpr(mce->sb->tyenv, st->Ist.Put.data))
5638              );
5639         if (b_offset >= 0) {
5640            /* FIXME: this isn't an atom! */
5641            stmt( 'B', mce, IRStmt_Put(b_offset + 2*mce->layout->total_sizeB,
5642                                       schemeE( mce, st->Ist.Put.data )) );
5643         }
5644         break;
5645      }
5646
5647      case Ist_WrTmp:
5648         assign( 'B', mce, findShadowTmpB(mce, st->Ist.WrTmp.tmp),
5649                           schemeE(mce, st->Ist.WrTmp.data) );
5650         break;
5651
5652      case Ist_MBE:
5653      case Ist_NoOp:
5654      case Ist_Exit:
5655      case Ist_IMark:
5656         break;
5657
5658      default:
5659         VG_(printf)("mc_translate.c: schemeS: unhandled: ");
5660         ppIRStmt(st);
5661         VG_(tool_panic)("memcheck:schemeS");
5662   }
5663}
5664
5665
5666/*--------------------------------------------------------------------*/
5667/*--- end                                           mc_translate.c ---*/
5668/*--------------------------------------------------------------------*/
5669