mc_translate.c revision 36a20fa5f779a0a6fb7b4a90dcaa6376481f1faa
1
2/*--------------------------------------------------------------------*/
3/*--- Instrument IR to perform memory checking operations.         ---*/
4/*---                                               mc_translate.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of MemCheck, a heavyweight Valgrind tool for
9   detecting memory errors.
10
11   Copyright (C) 2000-2005 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "mc_include.h"
33#include "pub_tool_libcprint.h"
34
35
36/*------------------------------------------------------------*/
37/*--- Forward decls                                        ---*/
38/*------------------------------------------------------------*/
39
40struct _MCEnv;
41
42static IRType  shadowType ( IRType ty );
43static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e );
44
45
46/*------------------------------------------------------------*/
47/*--- Memcheck running state, and tmp management.          ---*/
48/*------------------------------------------------------------*/
49
50/* Carries around state during memcheck instrumentation. */
51typedef
52   struct _MCEnv {
53      /* MODIFIED: the bb being constructed.  IRStmts are added. */
54      IRBB* bb;
55
56      /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps
57         original temps to their current their current shadow temp.
58         Initially all entries are IRTemp_INVALID.  Entries are added
59         lazily since many original temps are not used due to
60         optimisation prior to instrumentation.  Note that floating
61         point original tmps are shadowed by integer tmps of the same
62         size, and Bit-typed original tmps are shadowed by the type
63         Ity_I8.  See comment below. */
64      IRTemp* tmpMap;
65      Int     n_originalTmps; /* for range checking */
66
67      /* MODIFIED: indicates whether "bogus" literals have so far been
68         found.  Starts off False, and may change to True. */
69      Bool    bogusLiterals;
70
71      /* READONLY: the guest layout.  This indicates which parts of
72         the guest state should be regarded as 'always defined'. */
73      VexGuestLayout* layout;
74      /* READONLY: the host word type.  Needed for constructing
75         arguments of type 'HWord' to be passed to helper functions.
76         Ity_I32 or Ity_I64 only. */
77      IRType hWordTy;
78   }
79   MCEnv;
80
81/* SHADOW TMP MANAGEMENT.  Shadow tmps are allocated lazily (on
82   demand), as they are encountered.  This is for two reasons.
83
84   (1) (less important reason): Many original tmps are unused due to
85   initial IR optimisation, and we do not want to spaces in tables
86   tracking them.
87
88   Shadow IRTemps are therefore allocated on demand.  mce.tmpMap is a
89   table indexed [0 .. n_types-1], which gives the current shadow for
90   each original tmp, or INVALID_IRTEMP if none is so far assigned.
91   It is necessary to support making multiple assignments to a shadow
92   -- specifically, after testing a shadow for definedness, it needs
93   to be made defined.  But IR's SSA property disallows this.
94
95   (2) (more important reason): Therefore, when a shadow needs to get
96   a new value, a new temporary is created, the value is assigned to
97   that, and the tmpMap is updated to reflect the new binding.
98
99   A corollary is that if the tmpMap maps a given tmp to
100   INVALID_IRTEMP and we are hoping to read that shadow tmp, it means
101   there's a read-before-write error in the original tmps.  The IR
102   sanity checker should catch all such anomalies, however.
103*/
104
105/* Find the tmp currently shadowing the given original tmp.  If none
106   so far exists, allocate one.  */
107static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig )
108{
109   tl_assert(orig < mce->n_originalTmps);
110   if (mce->tmpMap[orig] == IRTemp_INVALID) {
111      mce->tmpMap[orig]
112         = newIRTemp(mce->bb->tyenv,
113                     shadowType(mce->bb->tyenv->types[orig]));
114   }
115   return mce->tmpMap[orig];
116}
117
118/* Allocate a new shadow for the given original tmp.  This means any
119   previous shadow is abandoned.  This is needed because it is
120   necessary to give a new value to a shadow once it has been tested
121   for undefinedness, but unfortunately IR's SSA property disallows
122   this.  Instead we must abandon the old shadow, allocate a new one
123   and use that instead. */
124static void newShadowTmp ( MCEnv* mce, IRTemp orig )
125{
126   tl_assert(orig < mce->n_originalTmps);
127   mce->tmpMap[orig]
128      = newIRTemp(mce->bb->tyenv,
129                  shadowType(mce->bb->tyenv->types[orig]));
130}
131
132
133/*------------------------------------------------------------*/
134/*--- IRAtoms -- a subset of IRExprs                       ---*/
135/*------------------------------------------------------------*/
136
137/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by
138   isIRAtom() in libvex_ir.h.  Because this instrumenter expects flat
139   input, most of this code deals in atoms.  Usefully, a value atom
140   always has a V-value which is also an atom: constants are shadowed
141   by constants, and temps are shadowed by the corresponding shadow
142   temporary. */
143
144typedef  IRExpr  IRAtom;
145
146/* (used for sanity checks only): is this an atom which looks
147   like it's from original code? */
148static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 )
149{
150   if (a1->tag == Iex_Const)
151      return True;
152   if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps)
153      return True;
154   return False;
155}
156
157/* (used for sanity checks only): is this an atom which looks
158   like it's from shadow code? */
159static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 )
160{
161   if (a1->tag == Iex_Const)
162      return True;
163   if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps)
164      return True;
165   return False;
166}
167
168/* (used for sanity checks only): check that both args are atoms and
169   are identically-kinded. */
170static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 )
171{
172   if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp)
173      return True;
174   if (a1->tag == Iex_Const && a1->tag == Iex_Const)
175      return True;
176   return False;
177}
178
179
180/*------------------------------------------------------------*/
181/*--- Type management                                      ---*/
182/*------------------------------------------------------------*/
183
184/* Shadow state is always accessed using integer types.  This returns
185   an integer type with the same size (as per sizeofIRType) as the
186   given type.  The only valid shadow types are Bit, I8, I16, I32,
187   I64, V128. */
188
189static IRType shadowType ( IRType ty )
190{
191   switch (ty) {
192      case Ity_I1:
193      case Ity_I8:
194      case Ity_I16:
195      case Ity_I32:
196      case Ity_I64:
197      case Ity_I128: return ty;
198      case Ity_F32:  return Ity_I32;
199      case Ity_F64:  return Ity_I64;
200      case Ity_V128: return Ity_V128;
201      default: ppIRType(ty);
202               VG_(tool_panic)("memcheck:shadowType");
203   }
204}
205
206/* Produce a 'defined' value of the given shadow type.  Should only be
207   supplied shadow types (Bit/I8/I16/I32/UI64). */
208static IRExpr* definedOfType ( IRType ty ) {
209   switch (ty) {
210      case Ity_I1:   return IRExpr_Const(IRConst_U1(False));
211      case Ity_I8:   return IRExpr_Const(IRConst_U8(0));
212      case Ity_I16:  return IRExpr_Const(IRConst_U16(0));
213      case Ity_I32:  return IRExpr_Const(IRConst_U32(0));
214      case Ity_I64:  return IRExpr_Const(IRConst_U64(0));
215      case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000));
216      default:      VG_(tool_panic)("memcheck:definedOfType");
217   }
218}
219
220
221/*------------------------------------------------------------*/
222/*--- Constructing IR fragments                            ---*/
223/*------------------------------------------------------------*/
224
225/* assign value to tmp */
226#define assign(_bb,_tmp,_expr)   \
227   addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr)))
228
229/* add stmt to a bb */
230#define stmt(_bb,_stmt)    \
231   addStmtToIRBB((_bb), (_stmt))
232
233/* build various kinds of expressions */
234#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2))
235#define unop(_op, _arg)          IRExpr_Unop((_op),(_arg))
236#define mkU8(_n)                 IRExpr_Const(IRConst_U8(_n))
237#define mkU16(_n)                IRExpr_Const(IRConst_U16(_n))
238#define mkU32(_n)                IRExpr_Const(IRConst_U32(_n))
239#define mkU64(_n)                IRExpr_Const(IRConst_U64(_n))
240#define mkV128(_n)               IRExpr_Const(IRConst_V128(_n))
241#define mkexpr(_tmp)             IRExpr_Tmp((_tmp))
242
243/* bind the given expression to a new temporary, and return the
244   temporary.  This effectively converts an arbitrary expression into
245   an atom. */
246static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) {
247   IRTemp t = newIRTemp(mce->bb->tyenv, ty);
248   assign(mce->bb, t, e);
249   return mkexpr(t);
250}
251
252
253/*------------------------------------------------------------*/
254/*--- Constructing definedness primitive ops               ---*/
255/*------------------------------------------------------------*/
256
257/* --------- Defined-if-either-defined --------- */
258
259static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
260   tl_assert(isShadowAtom(mce,a1));
261   tl_assert(isShadowAtom(mce,a2));
262   return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2));
263}
264
265static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
266   tl_assert(isShadowAtom(mce,a1));
267   tl_assert(isShadowAtom(mce,a2));
268   return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2));
269}
270
271static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
272   tl_assert(isShadowAtom(mce,a1));
273   tl_assert(isShadowAtom(mce,a2));
274   return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2));
275}
276
277static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
278   tl_assert(isShadowAtom(mce,a1));
279   tl_assert(isShadowAtom(mce,a2));
280   return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2));
281}
282
283static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
284   tl_assert(isShadowAtom(mce,a1));
285   tl_assert(isShadowAtom(mce,a2));
286   return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2));
287}
288
289/* --------- Undefined-if-either-undefined --------- */
290
291static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
292   tl_assert(isShadowAtom(mce,a1));
293   tl_assert(isShadowAtom(mce,a2));
294   return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2));
295}
296
297static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
298   tl_assert(isShadowAtom(mce,a1));
299   tl_assert(isShadowAtom(mce,a2));
300   return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2));
301}
302
303static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
304   tl_assert(isShadowAtom(mce,a1));
305   tl_assert(isShadowAtom(mce,a2));
306   return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2));
307}
308
309static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
310   tl_assert(isShadowAtom(mce,a1));
311   tl_assert(isShadowAtom(mce,a2));
312   return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2));
313}
314
315static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) {
316   tl_assert(isShadowAtom(mce,a1));
317   tl_assert(isShadowAtom(mce,a2));
318   return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2));
319}
320
321static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) {
322   switch (vty) {
323      case Ity_I8:   return mkUifU8(mce, a1, a2);
324      case Ity_I16:  return mkUifU16(mce, a1, a2);
325      case Ity_I32:  return mkUifU32(mce, a1, a2);
326      case Ity_I64:  return mkUifU64(mce, a1, a2);
327      case Ity_V128: return mkUifUV128(mce, a1, a2);
328      default:
329         VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n");
330         VG_(tool_panic)("memcheck:mkUifU");
331   }
332}
333
334/* --------- The Left-family of operations. --------- */
335
336static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) {
337   tl_assert(isShadowAtom(mce,a1));
338   /* It's safe to duplicate a1 since it's only an atom */
339   return assignNew(mce, Ity_I8,
340                    binop(Iop_Or8, a1,
341                          assignNew(mce, Ity_I8,
342                                         unop(Iop_Neg8, a1))));
343}
344
345static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) {
346   tl_assert(isShadowAtom(mce,a1));
347   /* It's safe to duplicate a1 since it's only an atom */
348   return assignNew(mce, Ity_I16,
349                    binop(Iop_Or16, a1,
350                          assignNew(mce, Ity_I16,
351                                         unop(Iop_Neg16, a1))));
352}
353
354static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) {
355   tl_assert(isShadowAtom(mce,a1));
356   /* It's safe to duplicate a1 since it's only an atom */
357   return assignNew(mce, Ity_I32,
358                    binop(Iop_Or32, a1,
359                          assignNew(mce, Ity_I32,
360                                         unop(Iop_Neg32, a1))));
361}
362
363static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) {
364   tl_assert(isShadowAtom(mce,a1));
365   /* It's safe to duplicate a1 since it's only an atom */
366   return assignNew(mce, Ity_I64,
367                    binop(Iop_Or64, a1,
368                          assignNew(mce, Ity_I64,
369                                         unop(Iop_Neg64, a1))));
370}
371
372/* --------- 'Improvement' functions for AND/OR. --------- */
373
374/* ImproveAND(data, vbits) = data OR vbits.  Defined (0) data 0s give
375   defined (0); all other -> undefined (1).
376*/
377static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
378{
379   tl_assert(isOriginalAtom(mce, data));
380   tl_assert(isShadowAtom(mce, vbits));
381   tl_assert(sameKindedAtoms(data, vbits));
382   return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits));
383}
384
385static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
386{
387   tl_assert(isOriginalAtom(mce, data));
388   tl_assert(isShadowAtom(mce, vbits));
389   tl_assert(sameKindedAtoms(data, vbits));
390   return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits));
391}
392
393static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
394{
395   tl_assert(isOriginalAtom(mce, data));
396   tl_assert(isShadowAtom(mce, vbits));
397   tl_assert(sameKindedAtoms(data, vbits));
398   return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits));
399}
400
401static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
402{
403   tl_assert(isOriginalAtom(mce, data));
404   tl_assert(isShadowAtom(mce, vbits));
405   tl_assert(sameKindedAtoms(data, vbits));
406   return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits));
407}
408
409static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
410{
411   tl_assert(isOriginalAtom(mce, data));
412   tl_assert(isShadowAtom(mce, vbits));
413   tl_assert(sameKindedAtoms(data, vbits));
414   return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits));
415}
416
417/* ImproveOR(data, vbits) = ~data OR vbits.  Defined (0) data 1s give
418   defined (0); all other -> undefined (1).
419*/
420static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
421{
422   tl_assert(isOriginalAtom(mce, data));
423   tl_assert(isShadowAtom(mce, vbits));
424   tl_assert(sameKindedAtoms(data, vbits));
425   return assignNew(
426             mce, Ity_I8,
427             binop(Iop_Or8,
428                   assignNew(mce, Ity_I8, unop(Iop_Not8, data)),
429                   vbits) );
430}
431
432static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
433{
434   tl_assert(isOriginalAtom(mce, data));
435   tl_assert(isShadowAtom(mce, vbits));
436   tl_assert(sameKindedAtoms(data, vbits));
437   return assignNew(
438             mce, Ity_I16,
439             binop(Iop_Or16,
440                   assignNew(mce, Ity_I16, unop(Iop_Not16, data)),
441                   vbits) );
442}
443
444static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
445{
446   tl_assert(isOriginalAtom(mce, data));
447   tl_assert(isShadowAtom(mce, vbits));
448   tl_assert(sameKindedAtoms(data, vbits));
449   return assignNew(
450             mce, Ity_I32,
451             binop(Iop_Or32,
452                   assignNew(mce, Ity_I32, unop(Iop_Not32, data)),
453                   vbits) );
454}
455
456static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
457{
458   tl_assert(isOriginalAtom(mce, data));
459   tl_assert(isShadowAtom(mce, vbits));
460   tl_assert(sameKindedAtoms(data, vbits));
461   return assignNew(
462             mce, Ity_I64,
463             binop(Iop_Or64,
464                   assignNew(mce, Ity_I64, unop(Iop_Not64, data)),
465                   vbits) );
466}
467
468static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits )
469{
470   tl_assert(isOriginalAtom(mce, data));
471   tl_assert(isShadowAtom(mce, vbits));
472   tl_assert(sameKindedAtoms(data, vbits));
473   return assignNew(
474             mce, Ity_V128,
475             binop(Iop_OrV128,
476                   assignNew(mce, Ity_V128, unop(Iop_NotV128, data)),
477                   vbits) );
478}
479
480/* --------- Pessimising casts. --------- */
481
482static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits )
483{
484   IRType  ty;
485   IRAtom* tmp1;
486   /* Note, dst_ty is a shadow type, not an original type. */
487   /* First of all, collapse vbits down to a single bit. */
488   tl_assert(isShadowAtom(mce,vbits));
489   ty   = typeOfIRExpr(mce->bb->tyenv, vbits);
490   tmp1 = NULL;
491   switch (ty) {
492      case Ity_I1:
493         tmp1 = vbits;
494         break;
495      case Ity_I8:
496         tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits));
497         break;
498      case Ity_I16:
499         tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits));
500         break;
501      case Ity_I32:
502         tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits));
503         break;
504      case Ity_I64:
505         tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits));
506         break;
507      case Ity_I128: {
508         /* Gah.  Chop it in half, OR the halves together, and compare
509            that with zero. */
510         IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits));
511         IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits));
512         IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3));
513         tmp1         = assignNew(mce, Ity_I1,
514                                       unop(Iop_CmpNEZ64, tmp4));
515         break;
516      }
517      default:
518         ppIRType(ty);
519         VG_(tool_panic)("mkPCastTo(1)");
520   }
521   tl_assert(tmp1);
522   /* Now widen up to the dst type. */
523   switch (dst_ty) {
524      case Ity_I1:
525         return tmp1;
526      case Ity_I8:
527         return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1));
528      case Ity_I16:
529         return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1));
530      case Ity_I32:
531         return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1));
532      case Ity_I64:
533         return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1));
534      case Ity_V128:
535         tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
536         tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1));
537         return tmp1;
538      case Ity_I128:
539         tmp1 = assignNew(mce, Ity_I64,  unop(Iop_1Sto64, tmp1));
540         tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1));
541         return tmp1;
542      default:
543         ppIRType(dst_ty);
544         VG_(tool_panic)("mkPCastTo(2)");
545   }
546}
547
548/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */
549/*
550   Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and
551   PCasting to Ity_U1.  However, sometimes it is necessary to be more
552   accurate.  The insight is that the result is defined if two
553   corresponding bits can be found, one from each argument, so that
554   both bits are defined but are different -- that makes EQ say "No"
555   and NE say "Yes".  Hence, we compute an improvement term and DifD
556   it onto the "normal" (UifU) result.
557
558   The result is:
559
560   PCastTo<1> (
561      PCastTo<sz>( UifU<sz>(vxx, vyy) )  -- naive version
562      `DifD<sz>`
563      PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term
564   )
565   where
566     vec contains 0 (defined) bits where the corresponding arg bits
567     are defined but different, and 1 bits otherwise:
568
569     vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) )
570*/
571static IRAtom* expensiveCmpEQorNE ( MCEnv*  mce,
572                                    IRType  ty,
573                                    IRAtom* vxx, IRAtom* vyy,
574                                    IRAtom* xx,  IRAtom* yy )
575{
576   IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top;
577   IROp   opDIFD, opUIFU, opXOR, opNOT, opCMP;
578
579   tl_assert(isShadowAtom(mce,vxx));
580   tl_assert(isShadowAtom(mce,vyy));
581   tl_assert(isOriginalAtom(mce,xx));
582   tl_assert(isOriginalAtom(mce,yy));
583   tl_assert(sameKindedAtoms(vxx,xx));
584   tl_assert(sameKindedAtoms(vyy,yy));
585
586   switch (ty) {
587      case Ity_I32:
588         opDIFD = Iop_And32;
589         opUIFU = Iop_Or32;
590         opNOT  = Iop_Not32;
591         opXOR  = Iop_Xor32;
592         opCMP  = Iop_CmpEQ32;
593         top    = mkU32(0xFFFFFFFF);
594         break;
595      case Ity_I64:
596         opDIFD = Iop_And64;
597         opUIFU = Iop_Or64;
598         opNOT  = Iop_Not64;
599         opXOR  = Iop_Xor64;
600         opCMP  = Iop_CmpEQ64;
601         top    = mkU64(0xFFFFFFFFFFFFFFFFULL);
602         break;
603      default:
604         VG_(tool_panic)("expensiveCmpEQorNE");
605   }
606
607   naive
608      = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy)));
609
610   vec
611      = assignNew(
612           mce,ty,
613           binop( opUIFU,
614                  assignNew(mce,ty, binop(opUIFU, vxx, vyy)),
615                  assignNew(
616                     mce,ty,
617                     unop( opNOT,
618                           assignNew(mce,ty, binop(opXOR, xx, yy))))));
619
620   vec_cmpd
621      = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top)));
622
623   improved
624      = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) );
625
626   final_cast
627      = mkPCastTo( mce, Ity_I1, improved );
628
629   return final_cast;
630}
631
632
633/*------------------------------------------------------------*/
634/*--- Emit a test and complaint if something is undefined. ---*/
635/*------------------------------------------------------------*/
636
637/* Set the annotations on a dirty helper to indicate that the stack
638   pointer and instruction pointers might be read.  This is the
639   behaviour of all 'emit-a-complaint' style functions we might
640   call. */
641
642static void setHelperAnns ( MCEnv* mce, IRDirty* di ) {
643   di->nFxState = 2;
644   di->fxState[0].fx     = Ifx_Read;
645   di->fxState[0].offset = mce->layout->offset_SP;
646   di->fxState[0].size   = mce->layout->sizeof_SP;
647   di->fxState[1].fx     = Ifx_Read;
648   di->fxState[1].offset = mce->layout->offset_IP;
649   di->fxState[1].size   = mce->layout->sizeof_IP;
650}
651
652
653/* Check the supplied **original** atom for undefinedness, and emit a
654   complaint if so.  Once that happens, mark it as defined.  This is
655   possible because the atom is either a tmp or literal.  If it's a
656   tmp, it will be shadowed by a tmp, and so we can set the shadow to
657   be defined.  In fact as mentioned above, we will have to allocate a
658   new tmp to carry the new 'defined' shadow value, and update the
659   original->tmp mapping accordingly; we cannot simply assign a new
660   value to an existing shadow tmp as this breaks SSAness -- resulting
661   in the post-instrumentation sanity checker spluttering in disapproval.
662*/
663static void complainIfUndefined ( MCEnv* mce, IRAtom* atom )
664{
665   IRAtom*  vatom;
666   IRType   ty;
667   Int      sz;
668   IRDirty* di;
669   IRAtom*  cond;
670
671   /* Since the original expression is atomic, there's no duplicated
672      work generated by making multiple V-expressions for it.  So we
673      don't really care about the possibility that someone else may
674      also create a V-interpretion for it. */
675   tl_assert(isOriginalAtom(mce, atom));
676   vatom = expr2vbits( mce, atom );
677   tl_assert(isShadowAtom(mce, vatom));
678   tl_assert(sameKindedAtoms(atom, vatom));
679
680   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
681
682   /* sz is only used for constructing the error message */
683   sz = ty==Ity_I1 ? 0 : sizeofIRType(ty);
684
685   cond = mkPCastTo( mce, Ity_I1, vatom );
686   /* cond will be 0 if all defined, and 1 if any not defined. */
687
688   switch (sz) {
689      case 0:
690         di = unsafeIRDirty_0_N( 0/*regparms*/,
691                                 "MC_(helperc_value_check0_fail)",
692                                 &MC_(helperc_value_check0_fail),
693                                 mkIRExprVec_0()
694                               );
695         break;
696      case 1:
697         di = unsafeIRDirty_0_N( 0/*regparms*/,
698                                 "MC_(helperc_value_check1_fail)",
699                                 &MC_(helperc_value_check1_fail),
700                                 mkIRExprVec_0()
701                               );
702         break;
703      case 4:
704         di = unsafeIRDirty_0_N( 0/*regparms*/,
705                                 "MC_(helperc_value_check4_fail)",
706                                 &MC_(helperc_value_check4_fail),
707                                 mkIRExprVec_0()
708                               );
709         break;
710      case 8:
711         di = unsafeIRDirty_0_N( 0/*regparms*/,
712                                 "MC_(helperc_value_check8_fail)",
713                                 &MC_(helperc_value_check8_fail),
714                                 mkIRExprVec_0()
715                               );
716         break;
717      default:
718         di = unsafeIRDirty_0_N( 1/*regparms*/,
719                                 "MC_(helperc_complain_undef)",
720                                 &MC_(helperc_complain_undef),
721                                 mkIRExprVec_1( mkIRExpr_HWord( sz ))
722                               );
723         break;
724   }
725   di->guard = cond;
726   setHelperAnns( mce, di );
727   stmt( mce->bb, IRStmt_Dirty(di));
728
729   /* Set the shadow tmp to be defined.  First, update the
730      orig->shadow tmp mapping to reflect the fact that this shadow is
731      getting a new value. */
732   tl_assert(isIRAtom(vatom));
733   /* sameKindedAtoms ... */
734   if (vatom->tag == Iex_Tmp) {
735      tl_assert(atom->tag == Iex_Tmp);
736      newShadowTmp(mce, atom->Iex.Tmp.tmp);
737      assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp),
738                      definedOfType(ty));
739   }
740}
741
742
743/*------------------------------------------------------------*/
744/*--- Shadowing PUTs/GETs, and indexed variants thereof    ---*/
745/*------------------------------------------------------------*/
746
747/* Examine the always-defined sections declared in layout to see if
748   the (offset,size) section is within one.  Note, is is an error to
749   partially fall into such a region: (offset,size) should either be
750   completely in such a region or completely not-in such a region.
751*/
752static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size )
753{
754   Int minoffD, maxoffD, i;
755   Int minoff = offset;
756   Int maxoff = minoff + size - 1;
757   tl_assert((minoff & ~0xFFFF) == 0);
758   tl_assert((maxoff & ~0xFFFF) == 0);
759
760   for (i = 0; i < mce->layout->n_alwaysDefd; i++) {
761      minoffD = mce->layout->alwaysDefd[i].offset;
762      maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1;
763      tl_assert((minoffD & ~0xFFFF) == 0);
764      tl_assert((maxoffD & ~0xFFFF) == 0);
765
766      if (maxoff < minoffD || maxoffD < minoff)
767         continue; /* no overlap */
768      if (minoff >= minoffD && maxoff <= maxoffD)
769         return True; /* completely contained in an always-defd section */
770
771      VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap");
772   }
773   return False; /* could not find any containing section */
774}
775
776
777/* Generate into bb suitable actions to shadow this Put.  If the state
778   slice is marked 'always defined', do nothing.  Otherwise, write the
779   supplied V bits to the shadow state.  We can pass in either an
780   original atom or a V-atom, but not both.  In the former case the
781   relevant V-bits are then generated from the original.
782*/
783static
784void do_shadow_PUT ( MCEnv* mce,  Int offset,
785                     IRAtom* atom, IRAtom* vatom )
786{
787   IRType ty;
788   if (atom) {
789      tl_assert(!vatom);
790      tl_assert(isOriginalAtom(mce, atom));
791      vatom = expr2vbits( mce, atom );
792   } else {
793      tl_assert(vatom);
794      tl_assert(isShadowAtom(mce, vatom));
795   }
796
797   ty = typeOfIRExpr(mce->bb->tyenv, vatom);
798   tl_assert(ty != Ity_I1);
799   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
800      /* later: no ... */
801      /* emit code to emit a complaint if any of the vbits are 1. */
802      /* complainIfUndefined(mce, atom); */
803   } else {
804      /* Do a plain shadow Put. */
805      stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) );
806   }
807}
808
809
810/* Return an expression which contains the V bits corresponding to the
811   given GETI (passed in in pieces).
812*/
813static
814void do_shadow_PUTI ( MCEnv* mce,
815                      IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom )
816{
817   IRAtom* vatom;
818   IRType  ty, tyS;
819   Int     arrSize;;
820
821   tl_assert(isOriginalAtom(mce,atom));
822   vatom = expr2vbits( mce, atom );
823   tl_assert(sameKindedAtoms(atom, vatom));
824   ty   = descr->elemTy;
825   tyS  = shadowType(ty);
826   arrSize = descr->nElems * sizeofIRType(ty);
827   tl_assert(ty != Ity_I1);
828   tl_assert(isOriginalAtom(mce,ix));
829   complainIfUndefined(mce,ix);
830   if (isAlwaysDefd(mce, descr->base, arrSize)) {
831      /* later: no ... */
832      /* emit code to emit a complaint if any of the vbits are 1. */
833      /* complainIfUndefined(mce, atom); */
834   } else {
835      /* Do a cloned version of the Put that refers to the shadow
836         area. */
837      IRArray* new_descr
838         = mkIRArray( descr->base + mce->layout->total_sizeB,
839                      tyS, descr->nElems);
840      stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom ));
841   }
842}
843
844
845/* Return an expression which contains the V bits corresponding to the
846   given GET (passed in in pieces).
847*/
848static
849IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty )
850{
851   IRType tyS = shadowType(ty);
852   tl_assert(ty != Ity_I1);
853   if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) {
854      /* Always defined, return all zeroes of the relevant type */
855      return definedOfType(tyS);
856   } else {
857      /* return a cloned version of the Get that refers to the shadow
858         area. */
859      return IRExpr_Get( offset + mce->layout->total_sizeB, tyS );
860   }
861}
862
863
864/* Return an expression which contains the V bits corresponding to the
865   given GETI (passed in in pieces).
866*/
867static
868IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias )
869{
870   IRType ty   = descr->elemTy;
871   IRType tyS  = shadowType(ty);
872   Int arrSize = descr->nElems * sizeofIRType(ty);
873   tl_assert(ty != Ity_I1);
874   tl_assert(isOriginalAtom(mce,ix));
875   complainIfUndefined(mce,ix);
876   if (isAlwaysDefd(mce, descr->base, arrSize)) {
877      /* Always defined, return all zeroes of the relevant type */
878      return definedOfType(tyS);
879   } else {
880      /* return a cloned version of the Get that refers to the shadow
881         area. */
882      IRArray* new_descr
883         = mkIRArray( descr->base + mce->layout->total_sizeB,
884                      tyS, descr->nElems);
885      return IRExpr_GetI( new_descr, ix, bias );
886   }
887}
888
889
890/*------------------------------------------------------------*/
891/*--- Generating approximations for unknown operations,    ---*/
892/*--- using lazy-propagate semantics                       ---*/
893/*------------------------------------------------------------*/
894
895/* Lazy propagation of undefinedness from two values, resulting in the
896   specified shadow type.
897*/
898static
899IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 )
900{
901   IRAtom* at;
902   IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1);
903   IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2);
904   tl_assert(isShadowAtom(mce,va1));
905   tl_assert(isShadowAtom(mce,va2));
906
907   /* The general case is inefficient because PCast is an expensive
908      operation.  Here are some special cases which use PCast only
909      once rather than twice. */
910
911   /* I64 x I64 -> I64 */
912   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) {
913      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n");
914      at = mkUifU(mce, Ity_I64, va1, va2);
915      at = mkPCastTo(mce, Ity_I64, at);
916      return at;
917   }
918
919   /* I64 x I64 -> I32 */
920   if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) {
921      if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n");
922      at = mkUifU(mce, Ity_I64, va1, va2);
923      at = mkPCastTo(mce, Ity_I32, at);
924      return at;
925   }
926
927   if (0) {
928      VG_(printf)("mkLazy2 ");
929      ppIRType(t1);
930      VG_(printf)("_");
931      ppIRType(t2);
932      VG_(printf)("_");
933      ppIRType(finalVty);
934      VG_(printf)("\n");
935   }
936
937   /* General case: force everything via 32-bit intermediaries. */
938   at = mkPCastTo(mce, Ity_I32, va1);
939   at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2));
940   at = mkPCastTo(mce, finalVty, at);
941   return at;
942}
943
944
945/* Do the lazy propagation game from a null-terminated vector of
946   atoms.  This is presumably the arguments to a helper call, so the
947   IRCallee info is also supplied in order that we can know which
948   arguments should be ignored (via the .mcx_mask field).
949*/
950static
951IRAtom* mkLazyN ( MCEnv* mce,
952                  IRAtom** exprvec, IRType finalVtype, IRCallee* cee )
953{
954   Int i;
955   IRAtom* here;
956   IRAtom* curr = definedOfType(Ity_I32);
957   for (i = 0; exprvec[i]; i++) {
958      tl_assert(i < 32);
959      tl_assert(isOriginalAtom(mce, exprvec[i]));
960      /* Only take notice of this arg if the callee's mc-exclusion
961         mask does not say it is to be excluded. */
962      if (cee->mcx_mask & (1<<i)) {
963         /* the arg is to be excluded from definedness checking.  Do
964            nothing. */
965         if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i);
966      } else {
967         /* calculate the arg's definedness, and pessimistically merge
968            it in. */
969         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) );
970         curr = mkUifU32(mce, here, curr);
971      }
972   }
973   return mkPCastTo(mce, finalVtype, curr );
974}
975
976
977/*------------------------------------------------------------*/
978/*--- Generating expensive sequences for exact carry-chain ---*/
979/*--- propagation in add/sub and related operations.       ---*/
980/*------------------------------------------------------------*/
981
982static
983IRAtom* expensiveAddSub ( MCEnv*  mce,
984                          Bool    add,
985                          IRType  ty,
986                          IRAtom* qaa, IRAtom* qbb,
987                          IRAtom* aa,  IRAtom* bb )
988{
989   IRAtom *a_min, *b_min, *a_max, *b_max;
990   IROp   opAND, opOR, opXOR, opNOT, opADD, opSUB;
991
992   tl_assert(isShadowAtom(mce,qaa));
993   tl_assert(isShadowAtom(mce,qbb));
994   tl_assert(isOriginalAtom(mce,aa));
995   tl_assert(isOriginalAtom(mce,bb));
996   tl_assert(sameKindedAtoms(qaa,aa));
997   tl_assert(sameKindedAtoms(qbb,bb));
998
999   switch (ty) {
1000      case Ity_I32:
1001         opAND = Iop_And32;
1002         opOR  = Iop_Or32;
1003         opXOR = Iop_Xor32;
1004         opNOT = Iop_Not32;
1005         opADD = Iop_Add32;
1006         opSUB = Iop_Sub32;
1007         break;
1008      default:
1009         VG_(tool_panic)("expensiveAddSub");
1010   }
1011
1012   // a_min = aa & ~qaa
1013   a_min = assignNew(mce,ty,
1014                     binop(opAND, aa,
1015                                  assignNew(mce,ty, unop(opNOT, qaa))));
1016
1017   // b_min = bb & ~qbb
1018   b_min = assignNew(mce,ty,
1019                     binop(opAND, bb,
1020                                  assignNew(mce,ty, unop(opNOT, qbb))));
1021
1022   // a_max = aa | qaa
1023   a_max = assignNew(mce,ty, binop(opOR, aa, qaa));
1024
1025   // b_max = bb | qbb
1026   b_max = assignNew(mce,ty, binop(opOR, bb, qbb));
1027
1028   if (add) {
1029      // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max))
1030      return
1031      assignNew(mce,ty,
1032         binop( opOR,
1033                assignNew(mce,ty, binop(opOR, qaa, qbb)),
1034                assignNew(mce,ty,
1035                   binop( opXOR,
1036                          assignNew(mce,ty, binop(opADD, a_min, b_min)),
1037                          assignNew(mce,ty, binop(opADD, a_max, b_max))
1038                   )
1039                )
1040         )
1041      );
1042   } else {
1043      // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min))
1044      return
1045      assignNew(mce,ty,
1046         binop( opOR,
1047                assignNew(mce,ty, binop(opOR, qaa, qbb)),
1048                assignNew(mce,ty,
1049                   binop( opXOR,
1050                          assignNew(mce,ty, binop(opSUB, a_min, b_max)),
1051                          assignNew(mce,ty, binop(opSUB, a_max, b_min))
1052                   )
1053                )
1054         )
1055      );
1056   }
1057
1058}
1059
1060
1061/*------------------------------------------------------------*/
1062/*--- Helpers for dealing with vector primops.            ---*/
1063/*------------------------------------------------------------*/
1064
1065/* Vector pessimisation -- pessimise within each lane individually. */
1066
1067static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at )
1068{
1069   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at));
1070}
1071
1072static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at )
1073{
1074   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at));
1075}
1076
1077static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at )
1078{
1079   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at));
1080}
1081
1082static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at )
1083{
1084   return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at));
1085}
1086
1087static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at )
1088{
1089   return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at));
1090}
1091
1092static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at )
1093{
1094   return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at));
1095}
1096
1097static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at )
1098{
1099   return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at));
1100}
1101
1102
1103/* Here's a simple scheme capable of handling ops derived from SSE1
1104   code and while only generating ops that can be efficiently
1105   implemented in SSE1. */
1106
1107/* All-lanes versions are straightforward:
1108
1109   binary32Fx4(x,y)   ==> PCast32x4(UifUV128(x#,y#))
1110
1111   unary32Fx4(x,y)    ==> PCast32x4(x#)
1112
1113   Lowest-lane-only versions are more complex:
1114
1115   binary32F0x4(x,y)  ==> SetV128lo32(
1116                             x#,
1117                             PCast32(V128to32(UifUV128(x#,y#)))
1118                          )
1119
1120   This is perhaps not so obvious.  In particular, it's faster to
1121   do a V128-bit UifU and then take the bottom 32 bits than the more
1122   obvious scheme of taking the bottom 32 bits of each operand
1123   and doing a 32-bit UifU.  Basically since UifU is fast and
1124   chopping lanes off vector values is slow.
1125
1126   Finally:
1127
1128   unary32F0x4(x)     ==> SetV128lo32(
1129                             x#,
1130                             PCast32(V128to32(x#))
1131                          )
1132
1133   Where:
1134
1135   PCast32(v#)   = 1Sto32(CmpNE32(v#,0))
1136   PCast32x4(v#) = CmpNEZ32x4(v#)
1137*/
1138
1139static
1140IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1141{
1142   IRAtom* at;
1143   tl_assert(isShadowAtom(mce, vatomX));
1144   tl_assert(isShadowAtom(mce, vatomY));
1145   at = mkUifUV128(mce, vatomX, vatomY);
1146   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at));
1147   return at;
1148}
1149
1150static
1151IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX )
1152{
1153   IRAtom* at;
1154   tl_assert(isShadowAtom(mce, vatomX));
1155   at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX));
1156   return at;
1157}
1158
1159static
1160IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1161{
1162   IRAtom* at;
1163   tl_assert(isShadowAtom(mce, vatomX));
1164   tl_assert(isShadowAtom(mce, vatomY));
1165   at = mkUifUV128(mce, vatomX, vatomY);
1166   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at));
1167   at = mkPCastTo(mce, Ity_I32, at);
1168   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1169   return at;
1170}
1171
1172static
1173IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX )
1174{
1175   IRAtom* at;
1176   tl_assert(isShadowAtom(mce, vatomX));
1177   at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX));
1178   at = mkPCastTo(mce, Ity_I32, at);
1179   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at));
1180   return at;
1181}
1182
1183/* --- ... and ... 64Fx2 versions of the same ... --- */
1184
1185static
1186IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1187{
1188   IRAtom* at;
1189   tl_assert(isShadowAtom(mce, vatomX));
1190   tl_assert(isShadowAtom(mce, vatomY));
1191   at = mkUifUV128(mce, vatomX, vatomY);
1192   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at));
1193   return at;
1194}
1195
1196static
1197IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX )
1198{
1199   IRAtom* at;
1200   tl_assert(isShadowAtom(mce, vatomX));
1201   at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX));
1202   return at;
1203}
1204
1205static
1206IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY )
1207{
1208   IRAtom* at;
1209   tl_assert(isShadowAtom(mce, vatomX));
1210   tl_assert(isShadowAtom(mce, vatomY));
1211   at = mkUifUV128(mce, vatomX, vatomY);
1212   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at));
1213   at = mkPCastTo(mce, Ity_I64, at);
1214   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1215   return at;
1216}
1217
1218static
1219IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX )
1220{
1221   IRAtom* at;
1222   tl_assert(isShadowAtom(mce, vatomX));
1223   at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX));
1224   at = mkPCastTo(mce, Ity_I64, at);
1225   at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at));
1226   return at;
1227}
1228
1229/* --- --- Vector saturated narrowing --- --- */
1230
1231/* This is quite subtle.  What to do is simple:
1232
1233   Let the original narrowing op be QNarrowW{S,U}xN.  Produce:
1234
1235      the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2))
1236
1237   Why this is right is not so simple.  Consider a lane in the args,
1238   vatom1 or 2, doesn't matter.
1239
1240   After the PCast, that lane is all 0s (defined) or all
1241   1s(undefined).
1242
1243   Both signed and unsigned saturating narrowing of all 0s produces
1244   all 0s, which is what we want.
1245
1246   The all-1s case is more complex.  Unsigned narrowing interprets an
1247   all-1s input as the largest unsigned integer, and so produces all
1248   1s as a result since that is the largest unsigned value at the
1249   smaller width.
1250
1251   Signed narrowing interprets all 1s as -1.  Fortunately, -1 narrows
1252   to -1, so we still wind up with all 1s at the smaller width.
1253
1254   So: In short, pessimise the args, then apply the original narrowing
1255   op.
1256*/
1257static
1258IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op,
1259                          IRAtom* vatom1, IRAtom* vatom2)
1260{
1261   IRAtom *at1, *at2, *at3;
1262   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1263   switch (narrow_op) {
1264      case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break;
1265      case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break;
1266      case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break;
1267      default: VG_(tool_panic)("vectorNarrowV128");
1268   }
1269   tl_assert(isShadowAtom(mce,vatom1));
1270   tl_assert(isShadowAtom(mce,vatom2));
1271   at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1));
1272   at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2));
1273   at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2));
1274   return at3;
1275}
1276
1277static
1278IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op,
1279                         IRAtom* vatom1, IRAtom* vatom2)
1280{
1281   IRAtom *at1, *at2, *at3;
1282   IRAtom* (*pcast)( MCEnv*, IRAtom* );
1283   switch (narrow_op) {
1284      case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break;
1285      case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break;
1286      case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break;
1287      default: VG_(tool_panic)("vectorNarrow64");
1288   }
1289   tl_assert(isShadowAtom(mce,vatom1));
1290   tl_assert(isShadowAtom(mce,vatom2));
1291   at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1));
1292   at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2));
1293   at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2));
1294   return at3;
1295}
1296
1297
1298/* --- --- Vector integer arithmetic --- --- */
1299
1300/* Simple ... UifU the args and per-lane pessimise the results. */
1301
1302/* --- V128-bit versions --- */
1303
1304static
1305IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1306{
1307   IRAtom* at;
1308   at = mkUifUV128(mce, vatom1, vatom2);
1309   at = mkPCast8x16(mce, at);
1310   return at;
1311}
1312
1313static
1314IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1315{
1316   IRAtom* at;
1317   at = mkUifUV128(mce, vatom1, vatom2);
1318   at = mkPCast16x8(mce, at);
1319   return at;
1320}
1321
1322static
1323IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1324{
1325   IRAtom* at;
1326   at = mkUifUV128(mce, vatom1, vatom2);
1327   at = mkPCast32x4(mce, at);
1328   return at;
1329}
1330
1331static
1332IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1333{
1334   IRAtom* at;
1335   at = mkUifUV128(mce, vatom1, vatom2);
1336   at = mkPCast64x2(mce, at);
1337   return at;
1338}
1339
1340/* --- 64-bit versions --- */
1341
1342static
1343IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1344{
1345   IRAtom* at;
1346   at = mkUifU64(mce, vatom1, vatom2);
1347   at = mkPCast8x8(mce, at);
1348   return at;
1349}
1350
1351static
1352IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1353{
1354   IRAtom* at;
1355   at = mkUifU64(mce, vatom1, vatom2);
1356   at = mkPCast16x4(mce, at);
1357   return at;
1358}
1359
1360static
1361IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 )
1362{
1363   IRAtom* at;
1364   at = mkUifU64(mce, vatom1, vatom2);
1365   at = mkPCast32x2(mce, at);
1366   return at;
1367}
1368
1369
1370/*------------------------------------------------------------*/
1371/*--- Generate shadow values from all kinds of IRExprs.    ---*/
1372/*------------------------------------------------------------*/
1373
1374static
1375IRAtom* expr2vbits_Binop ( MCEnv* mce,
1376                           IROp op,
1377                           IRAtom* atom1, IRAtom* atom2 )
1378{
1379   IRType  and_or_ty;
1380   IRAtom* (*uifu)    (MCEnv*, IRAtom*, IRAtom*);
1381   IRAtom* (*difd)    (MCEnv*, IRAtom*, IRAtom*);
1382   IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*);
1383
1384   IRAtom* vatom1 = expr2vbits( mce, atom1 );
1385   IRAtom* vatom2 = expr2vbits( mce, atom2 );
1386
1387   tl_assert(isOriginalAtom(mce,atom1));
1388   tl_assert(isOriginalAtom(mce,atom2));
1389   tl_assert(isShadowAtom(mce,vatom1));
1390   tl_assert(isShadowAtom(mce,vatom2));
1391   tl_assert(sameKindedAtoms(atom1,vatom1));
1392   tl_assert(sameKindedAtoms(atom2,vatom2));
1393   switch (op) {
1394
1395      /* 64-bit SIMD */
1396
1397      case Iop_ShrN16x4:
1398      case Iop_ShrN32x2:
1399      case Iop_SarN16x4:
1400      case Iop_SarN32x2:
1401      case Iop_ShlN16x4:
1402      case Iop_ShlN32x2:
1403         /* Same scheme as with all other shifts. */
1404         complainIfUndefined(mce, atom2);
1405         return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1406
1407      case Iop_QNarrow32Sx2:
1408      case Iop_QNarrow16Sx4:
1409      case Iop_QNarrow16Ux4:
1410         return vectorNarrow64(mce, op, vatom1, vatom2);
1411
1412      case Iop_Min8Ux8:
1413      case Iop_Max8Ux8:
1414      case Iop_Avg8Ux8:
1415      case Iop_QSub8Sx8:
1416      case Iop_QSub8Ux8:
1417      case Iop_Sub8x8:
1418      case Iop_CmpGT8Sx8:
1419      case Iop_CmpEQ8x8:
1420      case Iop_QAdd8Sx8:
1421      case Iop_QAdd8Ux8:
1422      case Iop_Add8x8:
1423         return binary8Ix8(mce, vatom1, vatom2);
1424
1425      case Iop_Min16Sx4:
1426      case Iop_Max16Sx4:
1427      case Iop_Avg16Ux4:
1428      case Iop_QSub16Ux4:
1429      case Iop_QSub16Sx4:
1430      case Iop_Sub16x4:
1431      case Iop_Mul16x4:
1432      case Iop_MulHi16Sx4:
1433      case Iop_MulHi16Ux4:
1434      case Iop_CmpGT16Sx4:
1435      case Iop_CmpEQ16x4:
1436      case Iop_QAdd16Sx4:
1437      case Iop_QAdd16Ux4:
1438      case Iop_Add16x4:
1439         return binary16Ix4(mce, vatom1, vatom2);
1440
1441      case Iop_Sub32x2:
1442      case Iop_CmpGT32Sx2:
1443      case Iop_CmpEQ32x2:
1444      case Iop_Add32x2:
1445         return binary32Ix2(mce, vatom1, vatom2);
1446
1447      /* 64-bit data-steering */
1448      case Iop_InterleaveLO32x2:
1449      case Iop_InterleaveLO16x4:
1450      case Iop_InterleaveLO8x8:
1451      case Iop_InterleaveHI32x2:
1452      case Iop_InterleaveHI16x4:
1453      case Iop_InterleaveHI8x8:
1454         return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1455
1456      /* V128-bit SIMD */
1457
1458      case Iop_ShrN16x8:
1459      case Iop_ShrN32x4:
1460      case Iop_ShrN64x2:
1461      case Iop_SarN16x8:
1462      case Iop_SarN32x4:
1463      case Iop_ShlN16x8:
1464      case Iop_ShlN32x4:
1465      case Iop_ShlN64x2:
1466         /* Same scheme as with all other shifts. */
1467         complainIfUndefined(mce, atom2);
1468         return assignNew(mce, Ity_V128, binop(op, vatom1, atom2));
1469
1470      case Iop_QSub8Ux16:
1471      case Iop_QSub8Sx16:
1472      case Iop_Sub8x16:
1473      case Iop_Min8Ux16:
1474      case Iop_Max8Ux16:
1475      case Iop_CmpGT8Sx16:
1476      case Iop_CmpEQ8x16:
1477      case Iop_Avg8Ux16:
1478      case Iop_QAdd8Ux16:
1479      case Iop_QAdd8Sx16:
1480      case Iop_Add8x16:
1481         return binary8Ix16(mce, vatom1, vatom2);
1482
1483      case Iop_QSub16Ux8:
1484      case Iop_QSub16Sx8:
1485      case Iop_Sub16x8:
1486      case Iop_Mul16x8:
1487      case Iop_MulHi16Sx8:
1488      case Iop_MulHi16Ux8:
1489      case Iop_Min16Sx8:
1490      case Iop_Max16Sx8:
1491      case Iop_CmpGT16Sx8:
1492      case Iop_CmpEQ16x8:
1493      case Iop_Avg16Ux8:
1494      case Iop_QAdd16Ux8:
1495      case Iop_QAdd16Sx8:
1496      case Iop_Add16x8:
1497         return binary16Ix8(mce, vatom1, vatom2);
1498
1499      case Iop_Sub32x4:
1500      case Iop_CmpGT32Sx4:
1501      case Iop_CmpEQ32x4:
1502      case Iop_Add32x4:
1503         return binary32Ix4(mce, vatom1, vatom2);
1504
1505      case Iop_Sub64x2:
1506      case Iop_Add64x2:
1507         return binary64Ix2(mce, vatom1, vatom2);
1508
1509      case Iop_QNarrow32Sx4:
1510      case Iop_QNarrow16Sx8:
1511      case Iop_QNarrow16Ux8:
1512         return vectorNarrowV128(mce, op, vatom1, vatom2);
1513
1514      case Iop_Sub64Fx2:
1515      case Iop_Mul64Fx2:
1516      case Iop_Min64Fx2:
1517      case Iop_Max64Fx2:
1518      case Iop_Div64Fx2:
1519      case Iop_CmpLT64Fx2:
1520      case Iop_CmpLE64Fx2:
1521      case Iop_CmpEQ64Fx2:
1522      case Iop_Add64Fx2:
1523         return binary64Fx2(mce, vatom1, vatom2);
1524
1525      case Iop_Sub64F0x2:
1526      case Iop_Mul64F0x2:
1527      case Iop_Min64F0x2:
1528      case Iop_Max64F0x2:
1529      case Iop_Div64F0x2:
1530      case Iop_CmpLT64F0x2:
1531      case Iop_CmpLE64F0x2:
1532      case Iop_CmpEQ64F0x2:
1533      case Iop_Add64F0x2:
1534         return binary64F0x2(mce, vatom1, vatom2);
1535
1536      case Iop_Sub32Fx4:
1537      case Iop_Mul32Fx4:
1538      case Iop_Min32Fx4:
1539      case Iop_Max32Fx4:
1540      case Iop_Div32Fx4:
1541      case Iop_CmpLT32Fx4:
1542      case Iop_CmpLE32Fx4:
1543      case Iop_CmpEQ32Fx4:
1544      case Iop_Add32Fx4:
1545         return binary32Fx4(mce, vatom1, vatom2);
1546
1547      case Iop_Sub32F0x4:
1548      case Iop_Mul32F0x4:
1549      case Iop_Min32F0x4:
1550      case Iop_Max32F0x4:
1551      case Iop_Div32F0x4:
1552      case Iop_CmpLT32F0x4:
1553      case Iop_CmpLE32F0x4:
1554      case Iop_CmpEQ32F0x4:
1555      case Iop_Add32F0x4:
1556         return binary32F0x4(mce, vatom1, vatom2);
1557
1558      /* V128-bit data-steering */
1559      case Iop_SetV128lo32:
1560      case Iop_SetV128lo64:
1561      case Iop_64HLtoV128:
1562      case Iop_InterleaveLO64x2:
1563      case Iop_InterleaveLO32x4:
1564      case Iop_InterleaveLO16x8:
1565      case Iop_InterleaveLO8x16:
1566      case Iop_InterleaveHI64x2:
1567      case Iop_InterleaveHI32x4:
1568      case Iop_InterleaveHI16x8:
1569      case Iop_InterleaveHI8x16:
1570         return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2));
1571
1572      /* I128-bit data-steering */
1573      case Iop_64HLto128:
1574         return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2));
1575
1576      /* Scalar floating point */
1577
1578      case Iop_RoundF64:
1579      case Iop_F64toI64:
1580      case Iop_I64toF64:
1581         /* First arg is I32 (rounding mode), second is F64 or I64
1582            (data). */
1583         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1584
1585      case Iop_PRemC3210F64: case Iop_PRem1C3210F64:
1586         /* Takes two F64 args. */
1587      case Iop_F64toI32:
1588      case Iop_F64toF32:
1589         /* First arg is I32 (rounding mode), second is F64 (data). */
1590         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1591
1592      case Iop_F64toI16:
1593         /* First arg is I32 (rounding mode), second is F64 (data). */
1594         return mkLazy2(mce, Ity_I16, vatom1, vatom2);
1595
1596      case Iop_ScaleF64:
1597      case Iop_Yl2xF64:
1598      case Iop_Yl2xp1F64:
1599      case Iop_PRemF64:
1600      case Iop_PRem1F64:
1601      case Iop_AtanF64:
1602      case Iop_AddF64:
1603      case Iop_DivF64:
1604      case Iop_SubF64:
1605      case Iop_MulF64:
1606         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1607
1608      case Iop_CmpF64:
1609         return mkLazy2(mce, Ity_I32, vatom1, vatom2);
1610
1611      /* non-FP after here */
1612
1613      case Iop_DivModU64to32:
1614      case Iop_DivModS64to32:
1615         return mkLazy2(mce, Ity_I64, vatom1, vatom2);
1616
1617      case Iop_DivModU128to64:
1618      case Iop_DivModS128to64:
1619         return mkLazy2(mce, Ity_I128, vatom1, vatom2);
1620
1621      case Iop_16HLto32:
1622         return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2));
1623      case Iop_32HLto64:
1624         return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2));
1625
1626      case Iop_MullS64:
1627      case Iop_MullU64: {
1628         IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1629         IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64);
1630         return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64));
1631      }
1632
1633      case Iop_MullS32:
1634      case Iop_MullU32: {
1635         IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1636         IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32);
1637         return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32));
1638      }
1639
1640      case Iop_MullS16:
1641      case Iop_MullU16: {
1642         IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1643         IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16);
1644         return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16));
1645      }
1646
1647      case Iop_MullS8:
1648      case Iop_MullU8: {
1649         IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1650         IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8);
1651         return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8));
1652      }
1653
1654      case Iop_Add32:
1655         if (mce->bogusLiterals)
1656            return expensiveAddSub(mce,True,Ity_I32,
1657                                   vatom1,vatom2, atom1,atom2);
1658         else
1659            goto cheap_AddSub32;
1660      case Iop_Sub32:
1661         if (mce->bogusLiterals)
1662            return expensiveAddSub(mce,False,Ity_I32,
1663                                   vatom1,vatom2, atom1,atom2);
1664         else
1665            goto cheap_AddSub32;
1666
1667      cheap_AddSub32:
1668      case Iop_Mul32:
1669         return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2));
1670
1671      /* could do better: Add64, Sub64 */
1672      case Iop_Mul64:
1673      case Iop_Add64:
1674      case Iop_Sub64:
1675         return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2));
1676
1677      case Iop_Mul16:
1678      case Iop_Add16:
1679      case Iop_Sub16:
1680         return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2));
1681
1682      case Iop_Sub8:
1683      case Iop_Add8:
1684         return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2));
1685
1686      case Iop_CmpEQ64:
1687         if (mce->bogusLiterals)
1688            return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 );
1689         else
1690            goto cheap_cmp64;
1691      cheap_cmp64:
1692      case Iop_CmpLE64S: case Iop_CmpLE64U:
1693      case Iop_CmpLT64U: case Iop_CmpLT64S:
1694      case Iop_CmpNE64:
1695         return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2));
1696
1697      case Iop_CmpEQ32:
1698         if (mce->bogusLiterals)
1699            return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 );
1700         else
1701            goto cheap_cmp32;
1702      cheap_cmp32:
1703      case Iop_CmpLE32S: case Iop_CmpLE32U:
1704      case Iop_CmpLT32U: case Iop_CmpLT32S:
1705      case Iop_CmpNE32:
1706         return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2));
1707
1708      case Iop_CmpEQ16: case Iop_CmpNE16:
1709         return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2));
1710
1711      case Iop_CmpEQ8: case Iop_CmpNE8:
1712         return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2));
1713
1714      case Iop_Shl32: case Iop_Shr32: case Iop_Sar32:
1715         /* Complain if the shift amount is undefined.  Then simply
1716            shift the first arg's V bits by the real shift amount. */
1717         complainIfUndefined(mce, atom2);
1718         return assignNew(mce, Ity_I32, binop(op, vatom1, atom2));
1719
1720      case Iop_Shl16: case Iop_Shr16: case Iop_Sar16:
1721         /* Same scheme as with 32-bit shifts. */
1722         complainIfUndefined(mce, atom2);
1723         return assignNew(mce, Ity_I16, binop(op, vatom1, atom2));
1724
1725      case Iop_Shl8: case Iop_Shr8:
1726         /* Same scheme as with 32-bit shifts. */
1727         complainIfUndefined(mce, atom2);
1728         return assignNew(mce, Ity_I8, binop(op, vatom1, atom2));
1729
1730      case Iop_Shl64: case Iop_Shr64: case Iop_Sar64:
1731         /* Same scheme as with 32-bit shifts. */
1732         complainIfUndefined(mce, atom2);
1733         return assignNew(mce, Ity_I64, binop(op, vatom1, atom2));
1734
1735      case Iop_AndV128:
1736         uifu = mkUifUV128; difd = mkDifDV128;
1737         and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or;
1738      case Iop_And64:
1739         uifu = mkUifU64; difd = mkDifD64;
1740         and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or;
1741      case Iop_And32:
1742         uifu = mkUifU32; difd = mkDifD32;
1743         and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or;
1744      case Iop_And16:
1745         uifu = mkUifU16; difd = mkDifD16;
1746         and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or;
1747      case Iop_And8:
1748         uifu = mkUifU8; difd = mkDifD8;
1749         and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or;
1750
1751      case Iop_OrV128:
1752         uifu = mkUifUV128; difd = mkDifDV128;
1753         and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or;
1754      case Iop_Or64:
1755         uifu = mkUifU64; difd = mkDifD64;
1756         and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or;
1757      case Iop_Or32:
1758         uifu = mkUifU32; difd = mkDifD32;
1759         and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or;
1760      case Iop_Or16:
1761         uifu = mkUifU16; difd = mkDifD16;
1762         and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or;
1763      case Iop_Or8:
1764         uifu = mkUifU8; difd = mkDifD8;
1765         and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or;
1766
1767      do_And_Or:
1768         return
1769         assignNew(
1770            mce,
1771            and_or_ty,
1772            difd(mce, uifu(mce, vatom1, vatom2),
1773                      difd(mce, improve(mce, atom1, vatom1),
1774                                improve(mce, atom2, vatom2) ) ) );
1775
1776      case Iop_Xor8:
1777         return mkUifU8(mce, vatom1, vatom2);
1778      case Iop_Xor16:
1779         return mkUifU16(mce, vatom1, vatom2);
1780      case Iop_Xor32:
1781         return mkUifU32(mce, vatom1, vatom2);
1782      case Iop_Xor64:
1783         return mkUifU64(mce, vatom1, vatom2);
1784      case Iop_XorV128:
1785         return mkUifUV128(mce, vatom1, vatom2);
1786
1787      default:
1788         ppIROp(op);
1789         VG_(tool_panic)("memcheck:expr2vbits_Binop");
1790   }
1791}
1792
1793
1794static
1795IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom )
1796{
1797   IRAtom* vatom = expr2vbits( mce, atom );
1798   tl_assert(isOriginalAtom(mce,atom));
1799   switch (op) {
1800
1801      case Iop_Sqrt64Fx2:
1802         return unary64Fx2(mce, vatom);
1803
1804      case Iop_Sqrt64F0x2:
1805         return unary64F0x2(mce, vatom);
1806
1807      case Iop_Sqrt32Fx4:
1808      case Iop_RSqrt32Fx4:
1809      case Iop_Recip32Fx4:
1810         return unary32Fx4(mce, vatom);
1811
1812      case Iop_Sqrt32F0x4:
1813      case Iop_RSqrt32F0x4:
1814      case Iop_Recip32F0x4:
1815         return unary32F0x4(mce, vatom);
1816
1817      case Iop_32UtoV128:
1818      case Iop_64UtoV128:
1819         return assignNew(mce, Ity_V128, unop(op, vatom));
1820
1821      case Iop_F32toF64:
1822      case Iop_I32toF64:
1823      case Iop_NegF64:
1824      case Iop_SinF64:
1825      case Iop_CosF64:
1826      case Iop_TanF64:
1827      case Iop_SqrtF64:
1828      case Iop_AbsF64:
1829      case Iop_2xm1F64:
1830         return mkPCastTo(mce, Ity_I64, vatom);
1831
1832      case Iop_Clz32:
1833      case Iop_Ctz32:
1834         return mkPCastTo(mce, Ity_I32, vatom);
1835
1836      case Iop_1Uto64:
1837      case Iop_8Uto64:
1838      case Iop_8Sto64:
1839      case Iop_16Uto64:
1840      case Iop_16Sto64:
1841      case Iop_32Sto64:
1842      case Iop_32Uto64:
1843      case Iop_V128to64:
1844      case Iop_V128HIto64:
1845      case Iop_128HIto64:
1846      case Iop_128to64:
1847         return assignNew(mce, Ity_I64, unop(op, vatom));
1848
1849      case Iop_64to32:
1850      case Iop_64HIto32:
1851      case Iop_1Uto32:
1852      case Iop_8Uto32:
1853      case Iop_16Uto32:
1854      case Iop_16Sto32:
1855      case Iop_8Sto32:
1856         return assignNew(mce, Ity_I32, unop(op, vatom));
1857
1858      case Iop_8Sto16:
1859      case Iop_8Uto16:
1860      case Iop_32to16:
1861      case Iop_32HIto16:
1862      case Iop_64to16:
1863         return assignNew(mce, Ity_I16, unop(op, vatom));
1864
1865      case Iop_1Uto8:
1866      case Iop_16to8:
1867      case Iop_32to8:
1868      case Iop_64to8:
1869         return assignNew(mce, Ity_I8, unop(op, vatom));
1870
1871      case Iop_32to1:
1872         return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom));
1873
1874      case Iop_64to1:
1875         return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom));
1876
1877      case Iop_ReinterpF64asI64:
1878      case Iop_ReinterpI64asF64:
1879      case Iop_ReinterpI32asF32:
1880      case Iop_NotV128:
1881      case Iop_Not64:
1882      case Iop_Not32:
1883      case Iop_Not16:
1884      case Iop_Not8:
1885      case Iop_Not1:
1886         return vatom;
1887
1888      /* Neg* really fall under the Add/Sub banner, and as such you
1889         might think would qualify for the 'expensive add/sub'
1890         treatment.  However, in this case since the implied literal
1891         is zero (0 - arg), we just do the cheap thing anyway. */
1892      case Iop_Neg8:
1893         return mkLeft8(mce, vatom);
1894      case Iop_Neg16:
1895         return mkLeft16(mce, vatom);
1896      case Iop_Neg32:
1897         return mkLeft32(mce, vatom);
1898
1899      default:
1900         ppIROp(op);
1901         VG_(tool_panic)("memcheck:expr2vbits_Unop");
1902   }
1903}
1904
1905
1906/* Worker function; do not call directly. */
1907static
1908IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1909{
1910   void*    helper;
1911   Char*    hname;
1912   IRDirty* di;
1913   IRTemp   datavbits;
1914   IRAtom*  addrAct;
1915
1916   tl_assert(isOriginalAtom(mce,addr));
1917
1918   /* First, emit a definedness test for the address.  This also sets
1919      the address (shadow) to 'defined' following the test. */
1920   complainIfUndefined( mce, addr );
1921
1922   /* Now cook up a call to the relevant helper function, to read the
1923      data V bits from shadow memory. */
1924   ty = shadowType(ty);
1925   switch (ty) {
1926      case Ity_I64: helper = &MC_(helperc_LOADV8);
1927                    hname = "MC_(helperc_LOADV8)";
1928                    break;
1929      case Ity_I32: helper = &MC_(helperc_LOADV4);
1930                    hname = "MC_(helperc_LOADV4)";
1931                    break;
1932      case Ity_I16: helper = &MC_(helperc_LOADV2);
1933                    hname = "MC_(helperc_LOADV2)";
1934                    break;
1935      case Ity_I8:  helper = &MC_(helperc_LOADV1);
1936                    hname = "MC_(helperc_LOADV1)";
1937                    break;
1938      default:      ppIRType(ty);
1939                    VG_(tool_panic)("memcheck:do_shadow_LDle");
1940   }
1941
1942   /* Generate the actual address into addrAct. */
1943   if (bias == 0) {
1944      addrAct = addr;
1945   } else {
1946      IROp    mkAdd;
1947      IRAtom* eBias;
1948      IRType  tyAddr  = mce->hWordTy;
1949      tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
1950      mkAdd   = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
1951      eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
1952      addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
1953   }
1954
1955   /* We need to have a place to park the V bits we're just about to
1956      read. */
1957   datavbits = newIRTemp(mce->bb->tyenv, ty);
1958   di = unsafeIRDirty_1_N( datavbits,
1959                           1/*regparms*/, hname, helper,
1960                           mkIRExprVec_1( addrAct ));
1961   setHelperAnns( mce, di );
1962   stmt( mce->bb, IRStmt_Dirty(di) );
1963
1964   return mkexpr(datavbits);
1965}
1966
1967
1968static
1969IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias )
1970{
1971   IRAtom *v64hi, *v64lo;
1972   switch (shadowType(ty)) {
1973      case Ity_I8:
1974      case Ity_I16:
1975      case Ity_I32:
1976      case Ity_I64:
1977         return expr2vbits_LDle_WRK(mce, ty, addr, bias);
1978      case Ity_V128:
1979         v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias);
1980         v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8);
1981         return assignNew( mce,
1982                           Ity_V128,
1983                           binop(Iop_64HLtoV128, v64hi, v64lo));
1984      default:
1985         VG_(tool_panic)("expr2vbits_LDle");
1986   }
1987}
1988
1989
1990static
1991IRAtom* expr2vbits_Mux0X ( MCEnv* mce,
1992                           IRAtom* cond, IRAtom* expr0, IRAtom* exprX )
1993{
1994   IRAtom *vbitsC, *vbits0, *vbitsX;
1995   IRType ty;
1996   /* Given Mux0X(cond,expr0,exprX), generate
1997         Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#)
1998      That is, steer the V bits like the originals, but trash the
1999      result if the steering value is undefined.  This gives
2000      lazy propagation. */
2001   tl_assert(isOriginalAtom(mce, cond));
2002   tl_assert(isOriginalAtom(mce, expr0));
2003   tl_assert(isOriginalAtom(mce, exprX));
2004
2005   vbitsC = expr2vbits(mce, cond);
2006   vbits0 = expr2vbits(mce, expr0);
2007   vbitsX = expr2vbits(mce, exprX);
2008   ty = typeOfIRExpr(mce->bb->tyenv, vbits0);
2009
2010   return
2011      mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)),
2012                      mkPCastTo(mce, ty, vbitsC) );
2013}
2014
2015/* --------- This is the main expression-handling function. --------- */
2016
2017static
2018IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e )
2019{
2020   switch (e->tag) {
2021
2022      case Iex_Get:
2023         return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty );
2024
2025      case Iex_GetI:
2026         return shadow_GETI( mce, e->Iex.GetI.descr,
2027                                  e->Iex.GetI.ix, e->Iex.GetI.bias );
2028
2029      case Iex_Tmp:
2030         return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) );
2031
2032      case Iex_Const:
2033         return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e)));
2034
2035      case Iex_Binop:
2036         return expr2vbits_Binop(
2037                   mce,
2038                   e->Iex.Binop.op,
2039                   e->Iex.Binop.arg1, e->Iex.Binop.arg2
2040                );
2041
2042      case Iex_Unop:
2043         return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg );
2044
2045      case Iex_LDle:
2046         return expr2vbits_LDle( mce, e->Iex.LDle.ty,
2047                                      e->Iex.LDle.addr, 0/*addr bias*/ );
2048
2049      case Iex_CCall:
2050         return mkLazyN( mce, e->Iex.CCall.args,
2051                              e->Iex.CCall.retty,
2052                              e->Iex.CCall.cee );
2053
2054      case Iex_Mux0X:
2055         return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0,
2056                                       e->Iex.Mux0X.exprX);
2057
2058      default:
2059         VG_(printf)("\n");
2060         ppIRExpr(e);
2061         VG_(printf)("\n");
2062         VG_(tool_panic)("memcheck: expr2vbits");
2063   }
2064}
2065
2066/*------------------------------------------------------------*/
2067/*--- Generate shadow stmts from all kinds of IRStmts.     ---*/
2068/*------------------------------------------------------------*/
2069
2070/* Widen a value to the host word size. */
2071
2072static
2073IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom )
2074{
2075   IRType ty, tyH;
2076
2077   /* vatom is vbits-value and as such can only have a shadow type. */
2078   tl_assert(isShadowAtom(mce,vatom));
2079
2080   ty  = typeOfIRExpr(mce->bb->tyenv, vatom);
2081   tyH = mce->hWordTy;
2082
2083   if (tyH == Ity_I32) {
2084      switch (ty) {
2085         case Ity_I32: return vatom;
2086         case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom));
2087         case Ity_I8:  return assignNew(mce, tyH, unop(Iop_8Uto32, vatom));
2088         default:      goto unhandled;
2089      }
2090   } else
2091   if (tyH == Ity_I64) {
2092      switch (ty) {
2093         case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom));
2094         case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64,
2095                              assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom))));
2096         case Ity_I8:  return assignNew(mce, tyH, unop(Iop_32Uto64,
2097                              assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom))));
2098         default:      goto unhandled;
2099      }
2100   } else {
2101      goto unhandled;
2102   }
2103  unhandled:
2104   VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n");
2105   VG_(tool_panic)("zwidenToHostWord");
2106}
2107
2108
2109/* Generate a shadow store.  addr is always the original address atom.
2110   You can pass in either originals or V-bits for the data atom, but
2111   obviously not both.  */
2112
2113static
2114void do_shadow_STle ( MCEnv* mce,
2115                      IRAtom* addr, UInt bias,
2116                      IRAtom* data, IRAtom* vdata )
2117{
2118   IROp     mkAdd;
2119   IRType   ty, tyAddr;
2120   IRDirty  *di, *diLo64, *diHi64;
2121   IRAtom   *addrAct, *addrLo64, *addrHi64;
2122   IRAtom   *vdataLo64, *vdataHi64;
2123   IRAtom   *eBias, *eBias0, *eBias8;
2124   void*    helper = NULL;
2125   Char*    hname = NULL;
2126
2127   tyAddr = mce->hWordTy;
2128   mkAdd  = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64;
2129   tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 );
2130
2131   di = diLo64 = diHi64 = NULL;
2132   eBias = eBias0 = eBias8 = NULL;
2133   addrAct = addrLo64 = addrHi64 = NULL;
2134   vdataLo64 = vdataHi64 = NULL;
2135
2136   if (data) {
2137      tl_assert(!vdata);
2138      tl_assert(isOriginalAtom(mce, data));
2139      tl_assert(bias == 0);
2140      vdata = expr2vbits( mce, data );
2141   } else {
2142      tl_assert(vdata);
2143   }
2144
2145   tl_assert(isOriginalAtom(mce,addr));
2146   tl_assert(isShadowAtom(mce,vdata));
2147
2148   ty = typeOfIRExpr(mce->bb->tyenv, vdata);
2149
2150   /* First, emit a definedness test for the address.  This also sets
2151      the address (shadow) to 'defined' following the test. */
2152   complainIfUndefined( mce, addr );
2153
2154   /* Now decide which helper function to call to write the data V
2155      bits into shadow memory. */
2156   switch (ty) {
2157      case Ity_V128: /* we'll use the helper twice */
2158      case Ity_I64: helper = &MC_(helperc_STOREV8);
2159                    hname = "MC_(helperc_STOREV8)";
2160                    break;
2161      case Ity_I32: helper = &MC_(helperc_STOREV4);
2162                    hname = "MC_(helperc_STOREV4)";
2163                    break;
2164      case Ity_I16: helper = &MC_(helperc_STOREV2);
2165                    hname = "MC_(helperc_STOREV2)";
2166                    break;
2167      case Ity_I8:  helper = &MC_(helperc_STOREV1);
2168                    hname = "MC_(helperc_STOREV1)";
2169                    break;
2170      default:      VG_(tool_panic)("memcheck:do_shadow_STle");
2171   }
2172
2173   if (ty == Ity_V128) {
2174
2175      /* V128-bit case */
2176      /* See comment in next clause re 64-bit regparms */
2177      eBias0    = tyAddr==Ity_I32 ? mkU32(bias)   : mkU64(bias);
2178      addrLo64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) );
2179      vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata));
2180      diLo64    = unsafeIRDirty_0_N(
2181                     1/*regparms*/, hname, helper,
2182                     mkIRExprVec_2( addrLo64, vdataLo64 ));
2183
2184      eBias8    = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8);
2185      addrHi64  = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) );
2186      vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata));
2187      diHi64    = unsafeIRDirty_0_N(
2188                     1/*regparms*/, hname, helper,
2189                     mkIRExprVec_2( addrHi64, vdataHi64 ));
2190
2191      setHelperAnns( mce, diLo64 );
2192      setHelperAnns( mce, diHi64 );
2193      stmt( mce->bb, IRStmt_Dirty(diLo64) );
2194      stmt( mce->bb, IRStmt_Dirty(diHi64) );
2195
2196   } else {
2197
2198      /* 8/16/32/64-bit cases */
2199      /* Generate the actual address into addrAct. */
2200      if (bias == 0) {
2201         addrAct = addr;
2202      } else {
2203         eBias   = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias);
2204         addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) );
2205      }
2206
2207      if (ty == Ity_I64) {
2208         /* We can't do this with regparm 2 on 32-bit platforms, since
2209            the back ends aren't clever enough to handle 64-bit
2210            regparm args.  Therefore be different. */
2211         di = unsafeIRDirty_0_N(
2212                 1/*regparms*/, hname, helper,
2213                 mkIRExprVec_2( addrAct, vdata ));
2214      } else {
2215         di = unsafeIRDirty_0_N(
2216                 2/*regparms*/, hname, helper,
2217                 mkIRExprVec_2( addrAct,
2218                                zwidenToHostWord( mce, vdata )));
2219      }
2220      setHelperAnns( mce, di );
2221      stmt( mce->bb, IRStmt_Dirty(di) );
2222   }
2223
2224}
2225
2226
2227/* Do lazy pessimistic propagation through a dirty helper call, by
2228   looking at the annotations on it.  This is the most complex part of
2229   Memcheck. */
2230
2231static IRType szToITy ( Int n )
2232{
2233   switch (n) {
2234      case 1: return Ity_I8;
2235      case 2: return Ity_I16;
2236      case 4: return Ity_I32;
2237      case 8: return Ity_I64;
2238      default: VG_(tool_panic)("szToITy(memcheck)");
2239   }
2240}
2241
2242static
2243void do_shadow_Dirty ( MCEnv* mce, IRDirty* d )
2244{
2245   Int     i, n, offset, toDo, gSz, gOff;
2246   IRAtom  *src, *here, *curr;
2247   IRType  tyAddr, tySrc, tyDst;
2248   IRTemp  dst;
2249
2250   /* First check the guard. */
2251   complainIfUndefined(mce, d->guard);
2252
2253   /* Now round up all inputs and PCast over them. */
2254   curr = definedOfType(Ity_I32);
2255
2256   /* Inputs: unmasked args */
2257   for (i = 0; d->args[i]; i++) {
2258      if (d->cee->mcx_mask & (1<<i)) {
2259         /* ignore this arg */
2260      } else {
2261         here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) );
2262         curr = mkUifU32(mce, here, curr);
2263      }
2264   }
2265
2266   /* Inputs: guest state that we read. */
2267   for (i = 0; i < d->nFxState; i++) {
2268      tl_assert(d->fxState[i].fx != Ifx_None);
2269      if (d->fxState[i].fx == Ifx_Write)
2270         continue;
2271
2272      /* Ignore any sections marked as 'always defined'. */
2273      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) {
2274         if (0)
2275         VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n",
2276                     d->fxState[i].offset, d->fxState[i].size );
2277         continue;
2278      }
2279
2280      /* This state element is read or modified.  So we need to
2281         consider it.  If larger than 8 bytes, deal with it in 8-byte
2282         chunks. */
2283      gSz  = d->fxState[i].size;
2284      gOff = d->fxState[i].offset;
2285      tl_assert(gSz > 0);
2286      while (True) {
2287         if (gSz == 0) break;
2288         n = gSz <= 8 ? gSz : 8;
2289         /* update 'curr' with UifU of the state slice
2290            gOff .. gOff+n-1 */
2291         tySrc = szToITy( n );
2292         src   = assignNew( mce, tySrc,
2293                            shadow_GET(mce, gOff, tySrc ) );
2294         here = mkPCastTo( mce, Ity_I32, src );
2295         curr = mkUifU32(mce, here, curr);
2296         gSz -= n;
2297         gOff += n;
2298      }
2299
2300   }
2301
2302   /* Inputs: memory.  First set up some info needed regardless of
2303      whether we're doing reads or writes. */
2304   tyAddr = Ity_INVALID;
2305
2306   if (d->mFx != Ifx_None) {
2307      /* Because we may do multiple shadow loads/stores from the same
2308         base address, it's best to do a single test of its
2309         definedness right now.  Post-instrumentation optimisation
2310         should remove all but this test. */
2311      tl_assert(d->mAddr);
2312      complainIfUndefined(mce, d->mAddr);
2313
2314      tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr);
2315      tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2316      tl_assert(tyAddr == mce->hWordTy); /* not really right */
2317   }
2318
2319   /* Deal with memory inputs (reads or modifies) */
2320   if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2321      offset = 0;
2322      toDo   = d->mSize;
2323      /* chew off 32-bit chunks */
2324      while (toDo >= 4) {
2325         here = mkPCastTo(
2326                   mce, Ity_I32,
2327                   expr2vbits_LDle ( mce, Ity_I32,
2328                                     d->mAddr, d->mSize - toDo )
2329                );
2330         curr = mkUifU32(mce, here, curr);
2331         toDo -= 4;
2332      }
2333      /* chew off 16-bit chunks */
2334      while (toDo >= 2) {
2335         here = mkPCastTo(
2336                   mce, Ity_I32,
2337                   expr2vbits_LDle ( mce, Ity_I16,
2338                                     d->mAddr, d->mSize - toDo )
2339                );
2340         curr = mkUifU32(mce, here, curr);
2341         toDo -= 2;
2342      }
2343      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2344   }
2345
2346   /* Whew!  So curr is a 32-bit V-value summarising pessimistically
2347      all the inputs to the helper.  Now we need to re-distribute the
2348      results to all destinations. */
2349
2350   /* Outputs: the destination temporary, if there is one. */
2351   if (d->tmp != IRTemp_INVALID) {
2352      dst   = findShadowTmp(mce, d->tmp);
2353      tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp);
2354      assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) );
2355   }
2356
2357   /* Outputs: guest state that we write or modify. */
2358   for (i = 0; i < d->nFxState; i++) {
2359      tl_assert(d->fxState[i].fx != Ifx_None);
2360      if (d->fxState[i].fx == Ifx_Read)
2361         continue;
2362      /* Ignore any sections marked as 'always defined'. */
2363      if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size ))
2364         continue;
2365      /* This state element is written or modified.  So we need to
2366         consider it.  If larger than 8 bytes, deal with it in 8-byte
2367         chunks. */
2368      gSz  = d->fxState[i].size;
2369      gOff = d->fxState[i].offset;
2370      tl_assert(gSz > 0);
2371      while (True) {
2372         if (gSz == 0) break;
2373         n = gSz <= 8 ? gSz : 8;
2374         /* Write suitably-casted 'curr' to the state slice
2375            gOff .. gOff+n-1 */
2376         tyDst = szToITy( n );
2377         do_shadow_PUT( mce, gOff,
2378                             NULL, /* original atom */
2379                             mkPCastTo( mce, tyDst, curr ) );
2380         gSz -= n;
2381         gOff += n;
2382      }
2383   }
2384
2385   /* Outputs: memory that we write or modify. */
2386   if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2387      offset = 0;
2388      toDo   = d->mSize;
2389      /* chew off 32-bit chunks */
2390      while (toDo >= 4) {
2391         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2392                         NULL, /* original data */
2393                         mkPCastTo( mce, Ity_I32, curr ) );
2394         toDo -= 4;
2395      }
2396      /* chew off 16-bit chunks */
2397      while (toDo >= 2) {
2398         do_shadow_STle( mce, d->mAddr, d->mSize - toDo,
2399                         NULL, /* original data */
2400                         mkPCastTo( mce, Ity_I16, curr ) );
2401         toDo -= 2;
2402      }
2403      tl_assert(toDo == 0); /* also need to handle 1-byte excess */
2404   }
2405
2406}
2407
2408/* We have an ABI hint telling us that [base .. base+len-1] is to
2409   become undefined ("writable").  Generate code to call a helper to
2410   notify the A/V bit machinery of this fact.
2411
2412   We call
2413   void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len );
2414*/
2415static
2416void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len )
2417{
2418   IRDirty* di;
2419   di = unsafeIRDirty_0_N(
2420           0/*regparms*/,
2421           "MC_(helperc_MAKE_STACK_UNINIT)",
2422           &MC_(helperc_MAKE_STACK_UNINIT),
2423           mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) )
2424        );
2425   stmt( mce->bb, IRStmt_Dirty(di) );
2426}
2427
2428
2429/*------------------------------------------------------------*/
2430/*--- Memcheck main                                        ---*/
2431/*------------------------------------------------------------*/
2432
2433static Bool isBogusAtom ( IRAtom* at )
2434{
2435   ULong n = 0;
2436   IRConst* con;
2437   tl_assert(isIRAtom(at));
2438   if (at->tag == Iex_Tmp)
2439      return False;
2440   tl_assert(at->tag == Iex_Const);
2441   con = at->Iex.Const.con;
2442   switch (con->tag) {
2443      case Ico_U1:   return False;
2444      case Ico_U8:   n = (ULong)con->Ico.U8; break;
2445      case Ico_U16:  n = (ULong)con->Ico.U16; break;
2446      case Ico_U32:  n = (ULong)con->Ico.U32; break;
2447      case Ico_U64:  n = (ULong)con->Ico.U64; break;
2448      case Ico_F64:  return False;
2449      case Ico_F64i: return False;
2450      case Ico_V128: return False;
2451      default: ppIRExpr(at); tl_assert(0);
2452   }
2453   /* VG_(printf)("%llx\n", n); */
2454   return (/*32*/    n == 0xFEFEFEFFULL
2455           /*32*/ || n == 0x80808080ULL
2456           /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL
2457           /*64*/ || n == 0x8080808080808080ULL
2458	   /*64*/ || n == 0x0101010101010101ULL
2459          );
2460}
2461
2462static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st )
2463{
2464   Int      i;
2465   IRExpr*  e;
2466   IRDirty* d;
2467   switch (st->tag) {
2468      case Ist_Tmp:
2469         e = st->Ist.Tmp.data;
2470         switch (e->tag) {
2471            case Iex_Get:
2472            case Iex_Tmp:
2473               return False;
2474            case Iex_Const:
2475               return isBogusAtom(e);
2476            case Iex_Unop:
2477               return isBogusAtom(e->Iex.Unop.arg);
2478            case Iex_GetI:
2479               return isBogusAtom(e->Iex.GetI.ix);
2480            case Iex_Binop:
2481               return isBogusAtom(e->Iex.Binop.arg1)
2482                      || isBogusAtom(e->Iex.Binop.arg2);
2483            case Iex_Mux0X:
2484               return isBogusAtom(e->Iex.Mux0X.cond)
2485                      || isBogusAtom(e->Iex.Mux0X.expr0)
2486                      || isBogusAtom(e->Iex.Mux0X.exprX);
2487            case Iex_LDle:
2488               return isBogusAtom(e->Iex.LDle.addr);
2489            case Iex_CCall:
2490               for (i = 0; e->Iex.CCall.args[i]; i++)
2491                  if (isBogusAtom(e->Iex.CCall.args[i]))
2492                     return True;
2493               return False;
2494            default:
2495               goto unhandled;
2496         }
2497      case Ist_Dirty:
2498         d = st->Ist.Dirty.details;
2499         for (i = 0; d->args[i]; i++)
2500            if (isBogusAtom(d->args[i]))
2501               return True;
2502         if (d->guard && isBogusAtom(d->guard))
2503            return True;
2504         if (d->mAddr && isBogusAtom(d->mAddr))
2505            return True;
2506         return False;
2507      case Ist_Put:
2508         return isBogusAtom(st->Ist.Put.data);
2509      case Ist_PutI:
2510         return isBogusAtom(st->Ist.PutI.ix)
2511                || isBogusAtom(st->Ist.PutI.data);
2512      case Ist_STle:
2513         return isBogusAtom(st->Ist.STle.addr)
2514                || isBogusAtom(st->Ist.STle.data);
2515      case Ist_Exit:
2516         return isBogusAtom(st->Ist.Exit.guard);
2517      case Ist_AbiHint:
2518         return isBogusAtom(st->Ist.AbiHint.base);
2519      case Ist_NoOp:
2520      case Ist_IMark:
2521      case Ist_MFence:
2522         return False;
2523      default:
2524      unhandled:
2525         ppIRStmt(st);
2526         VG_(tool_panic)("hasBogusLiterals");
2527   }
2528}
2529
2530
2531IRBB* MC_(instrument) ( IRBB* bb_in, VexGuestLayout* layout,
2532                        IRType gWordTy, IRType hWordTy )
2533{
2534   Bool verboze = False; //True;
2535
2536   Int     i, j, first_stmt;
2537   IRStmt* st;
2538   MCEnv   mce;
2539   IRBB*   bb;
2540
2541   if (gWordTy != hWordTy) {
2542      /* We don't currently support this case. */
2543      VG_(tool_panic)("host/guest word size mismatch");
2544   }
2545
2546   /* Check we're not completely nuts */
2547   tl_assert(sizeof(UWord) == sizeof(void*));
2548   tl_assert(sizeof(Word)  == sizeof(void*));
2549   tl_assert(sizeof(ULong) == 8);
2550   tl_assert(sizeof(Long)  == 8);
2551   tl_assert(sizeof(UInt)  == 4);
2552   tl_assert(sizeof(Int)   == 4);
2553
2554   /* Set up BB */
2555   bb           = emptyIRBB();
2556   bb->tyenv    = dopyIRTypeEnv(bb_in->tyenv);
2557   bb->next     = dopyIRExpr(bb_in->next);
2558   bb->jumpkind = bb_in->jumpkind;
2559
2560   /* Set up the running environment.  Only .bb is modified as we go
2561      along. */
2562   mce.bb             = bb;
2563   mce.layout         = layout;
2564   mce.n_originalTmps = bb->tyenv->types_used;
2565   mce.hWordTy        = hWordTy;
2566   mce.bogusLiterals  = False;
2567   mce.tmpMap         = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp));
2568   for (i = 0; i < mce.n_originalTmps; i++)
2569      mce.tmpMap[i] = IRTemp_INVALID;
2570
2571   /* Iterate over the stmts. */
2572
2573   for (i = 0; i <  bb_in->stmts_used; i++) {
2574      st = bb_in->stmts[i];
2575      tl_assert(st);
2576
2577      tl_assert(isFlatIRStmt(st));
2578
2579      if (!mce.bogusLiterals) {
2580         mce.bogusLiterals = checkForBogusLiterals(st);
2581         if (0&& mce.bogusLiterals) {
2582            VG_(printf)("bogus: ");
2583            ppIRStmt(st);
2584            VG_(printf)("\n");
2585         }
2586      }
2587
2588      first_stmt = bb->stmts_used;
2589
2590      if (verboze) {
2591         ppIRStmt(st);
2592         VG_(printf)("\n\n");
2593      }
2594
2595      /* Generate instrumentation code for each stmt ... */
2596
2597      switch (st->tag) {
2598
2599         case Ist_Tmp:
2600            assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp),
2601                        expr2vbits( &mce, st->Ist.Tmp.data) );
2602            break;
2603
2604         case Ist_Put:
2605            do_shadow_PUT( &mce,
2606                           st->Ist.Put.offset,
2607                           st->Ist.Put.data,
2608                           NULL /* shadow atom */ );
2609            break;
2610
2611         case Ist_PutI:
2612            do_shadow_PUTI( &mce,
2613                            st->Ist.PutI.descr,
2614                            st->Ist.PutI.ix,
2615                            st->Ist.PutI.bias,
2616                            st->Ist.PutI.data );
2617            break;
2618
2619         case Ist_STle:
2620            do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */,
2621                                  st->Ist.STle.data,
2622                                  NULL /* shadow data */ );
2623            break;
2624
2625         case Ist_Exit:
2626            complainIfUndefined( &mce, st->Ist.Exit.guard );
2627            break;
2628
2629         case Ist_NoOp:
2630         case Ist_IMark:
2631         case Ist_MFence:
2632            break;
2633
2634         case Ist_Dirty:
2635            do_shadow_Dirty( &mce, st->Ist.Dirty.details );
2636            break;
2637
2638         case Ist_AbiHint:
2639            do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len );
2640            break;
2641
2642         default:
2643            VG_(printf)("\n");
2644            ppIRStmt(st);
2645            VG_(printf)("\n");
2646            VG_(tool_panic)("memcheck: unhandled IRStmt");
2647
2648      } /* switch (st->tag) */
2649
2650      if (verboze) {
2651         for (j = first_stmt; j < bb->stmts_used; j++) {
2652            VG_(printf)("   ");
2653            ppIRStmt(bb->stmts[j]);
2654            VG_(printf)("\n");
2655         }
2656         VG_(printf)("\n");
2657      }
2658
2659      /* ... and finally copy the stmt itself to the output. */
2660      addStmtToIRBB(bb, st);
2661
2662   }
2663
2664   /* Now we need to complain if the jump target is undefined. */
2665   first_stmt = bb->stmts_used;
2666
2667   if (verboze) {
2668      VG_(printf)("bb->next = ");
2669      ppIRExpr(bb->next);
2670      VG_(printf)("\n\n");
2671   }
2672
2673   complainIfUndefined( &mce, bb->next );
2674
2675   if (verboze) {
2676      for (j = first_stmt; j < bb->stmts_used; j++) {
2677         VG_(printf)("   ");
2678         ppIRStmt(bb->stmts[j]);
2679         VG_(printf)("\n");
2680      }
2681      VG_(printf)("\n");
2682   }
2683
2684   return bb;
2685}
2686
2687/*--------------------------------------------------------------------*/
2688/*--- end                                           mc_translate.c ---*/
2689/*--------------------------------------------------------------------*/
2690