mc_translate.c revision 36a20fa5f779a0a6fb7b4a90dcaa6376481f1faa
1 2/*--------------------------------------------------------------------*/ 3/*--- Instrument IR to perform memory checking operations. ---*/ 4/*--- mc_translate.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of MemCheck, a heavyweight Valgrind tool for 9 detecting memory errors. 10 11 Copyright (C) 2000-2005 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "mc_include.h" 33#include "pub_tool_libcprint.h" 34 35 36/*------------------------------------------------------------*/ 37/*--- Forward decls ---*/ 38/*------------------------------------------------------------*/ 39 40struct _MCEnv; 41 42static IRType shadowType ( IRType ty ); 43static IRExpr* expr2vbits ( struct _MCEnv* mce, IRExpr* e ); 44 45 46/*------------------------------------------------------------*/ 47/*--- Memcheck running state, and tmp management. ---*/ 48/*------------------------------------------------------------*/ 49 50/* Carries around state during memcheck instrumentation. */ 51typedef 52 struct _MCEnv { 53 /* MODIFIED: the bb being constructed. IRStmts are added. */ 54 IRBB* bb; 55 56 /* MODIFIED: a table [0 .. #temps_in_original_bb-1] which maps 57 original temps to their current their current shadow temp. 58 Initially all entries are IRTemp_INVALID. Entries are added 59 lazily since many original temps are not used due to 60 optimisation prior to instrumentation. Note that floating 61 point original tmps are shadowed by integer tmps of the same 62 size, and Bit-typed original tmps are shadowed by the type 63 Ity_I8. See comment below. */ 64 IRTemp* tmpMap; 65 Int n_originalTmps; /* for range checking */ 66 67 /* MODIFIED: indicates whether "bogus" literals have so far been 68 found. Starts off False, and may change to True. */ 69 Bool bogusLiterals; 70 71 /* READONLY: the guest layout. This indicates which parts of 72 the guest state should be regarded as 'always defined'. */ 73 VexGuestLayout* layout; 74 /* READONLY: the host word type. Needed for constructing 75 arguments of type 'HWord' to be passed to helper functions. 76 Ity_I32 or Ity_I64 only. */ 77 IRType hWordTy; 78 } 79 MCEnv; 80 81/* SHADOW TMP MANAGEMENT. Shadow tmps are allocated lazily (on 82 demand), as they are encountered. This is for two reasons. 83 84 (1) (less important reason): Many original tmps are unused due to 85 initial IR optimisation, and we do not want to spaces in tables 86 tracking them. 87 88 Shadow IRTemps are therefore allocated on demand. mce.tmpMap is a 89 table indexed [0 .. n_types-1], which gives the current shadow for 90 each original tmp, or INVALID_IRTEMP if none is so far assigned. 91 It is necessary to support making multiple assignments to a shadow 92 -- specifically, after testing a shadow for definedness, it needs 93 to be made defined. But IR's SSA property disallows this. 94 95 (2) (more important reason): Therefore, when a shadow needs to get 96 a new value, a new temporary is created, the value is assigned to 97 that, and the tmpMap is updated to reflect the new binding. 98 99 A corollary is that if the tmpMap maps a given tmp to 100 INVALID_IRTEMP and we are hoping to read that shadow tmp, it means 101 there's a read-before-write error in the original tmps. The IR 102 sanity checker should catch all such anomalies, however. 103*/ 104 105/* Find the tmp currently shadowing the given original tmp. If none 106 so far exists, allocate one. */ 107static IRTemp findShadowTmp ( MCEnv* mce, IRTemp orig ) 108{ 109 tl_assert(orig < mce->n_originalTmps); 110 if (mce->tmpMap[orig] == IRTemp_INVALID) { 111 mce->tmpMap[orig] 112 = newIRTemp(mce->bb->tyenv, 113 shadowType(mce->bb->tyenv->types[orig])); 114 } 115 return mce->tmpMap[orig]; 116} 117 118/* Allocate a new shadow for the given original tmp. This means any 119 previous shadow is abandoned. This is needed because it is 120 necessary to give a new value to a shadow once it has been tested 121 for undefinedness, but unfortunately IR's SSA property disallows 122 this. Instead we must abandon the old shadow, allocate a new one 123 and use that instead. */ 124static void newShadowTmp ( MCEnv* mce, IRTemp orig ) 125{ 126 tl_assert(orig < mce->n_originalTmps); 127 mce->tmpMap[orig] 128 = newIRTemp(mce->bb->tyenv, 129 shadowType(mce->bb->tyenv->types[orig])); 130} 131 132 133/*------------------------------------------------------------*/ 134/*--- IRAtoms -- a subset of IRExprs ---*/ 135/*------------------------------------------------------------*/ 136 137/* An atom is either an IRExpr_Const or an IRExpr_Tmp, as defined by 138 isIRAtom() in libvex_ir.h. Because this instrumenter expects flat 139 input, most of this code deals in atoms. Usefully, a value atom 140 always has a V-value which is also an atom: constants are shadowed 141 by constants, and temps are shadowed by the corresponding shadow 142 temporary. */ 143 144typedef IRExpr IRAtom; 145 146/* (used for sanity checks only): is this an atom which looks 147 like it's from original code? */ 148static Bool isOriginalAtom ( MCEnv* mce, IRAtom* a1 ) 149{ 150 if (a1->tag == Iex_Const) 151 return True; 152 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp < mce->n_originalTmps) 153 return True; 154 return False; 155} 156 157/* (used for sanity checks only): is this an atom which looks 158 like it's from shadow code? */ 159static Bool isShadowAtom ( MCEnv* mce, IRAtom* a1 ) 160{ 161 if (a1->tag == Iex_Const) 162 return True; 163 if (a1->tag == Iex_Tmp && a1->Iex.Tmp.tmp >= mce->n_originalTmps) 164 return True; 165 return False; 166} 167 168/* (used for sanity checks only): check that both args are atoms and 169 are identically-kinded. */ 170static Bool sameKindedAtoms ( IRAtom* a1, IRAtom* a2 ) 171{ 172 if (a1->tag == Iex_Tmp && a1->tag == Iex_Tmp) 173 return True; 174 if (a1->tag == Iex_Const && a1->tag == Iex_Const) 175 return True; 176 return False; 177} 178 179 180/*------------------------------------------------------------*/ 181/*--- Type management ---*/ 182/*------------------------------------------------------------*/ 183 184/* Shadow state is always accessed using integer types. This returns 185 an integer type with the same size (as per sizeofIRType) as the 186 given type. The only valid shadow types are Bit, I8, I16, I32, 187 I64, V128. */ 188 189static IRType shadowType ( IRType ty ) 190{ 191 switch (ty) { 192 case Ity_I1: 193 case Ity_I8: 194 case Ity_I16: 195 case Ity_I32: 196 case Ity_I64: 197 case Ity_I128: return ty; 198 case Ity_F32: return Ity_I32; 199 case Ity_F64: return Ity_I64; 200 case Ity_V128: return Ity_V128; 201 default: ppIRType(ty); 202 VG_(tool_panic)("memcheck:shadowType"); 203 } 204} 205 206/* Produce a 'defined' value of the given shadow type. Should only be 207 supplied shadow types (Bit/I8/I16/I32/UI64). */ 208static IRExpr* definedOfType ( IRType ty ) { 209 switch (ty) { 210 case Ity_I1: return IRExpr_Const(IRConst_U1(False)); 211 case Ity_I8: return IRExpr_Const(IRConst_U8(0)); 212 case Ity_I16: return IRExpr_Const(IRConst_U16(0)); 213 case Ity_I32: return IRExpr_Const(IRConst_U32(0)); 214 case Ity_I64: return IRExpr_Const(IRConst_U64(0)); 215 case Ity_V128: return IRExpr_Const(IRConst_V128(0x0000)); 216 default: VG_(tool_panic)("memcheck:definedOfType"); 217 } 218} 219 220 221/*------------------------------------------------------------*/ 222/*--- Constructing IR fragments ---*/ 223/*------------------------------------------------------------*/ 224 225/* assign value to tmp */ 226#define assign(_bb,_tmp,_expr) \ 227 addStmtToIRBB((_bb), IRStmt_Tmp((_tmp),(_expr))) 228 229/* add stmt to a bb */ 230#define stmt(_bb,_stmt) \ 231 addStmtToIRBB((_bb), (_stmt)) 232 233/* build various kinds of expressions */ 234#define binop(_op, _arg1, _arg2) IRExpr_Binop((_op),(_arg1),(_arg2)) 235#define unop(_op, _arg) IRExpr_Unop((_op),(_arg)) 236#define mkU8(_n) IRExpr_Const(IRConst_U8(_n)) 237#define mkU16(_n) IRExpr_Const(IRConst_U16(_n)) 238#define mkU32(_n) IRExpr_Const(IRConst_U32(_n)) 239#define mkU64(_n) IRExpr_Const(IRConst_U64(_n)) 240#define mkV128(_n) IRExpr_Const(IRConst_V128(_n)) 241#define mkexpr(_tmp) IRExpr_Tmp((_tmp)) 242 243/* bind the given expression to a new temporary, and return the 244 temporary. This effectively converts an arbitrary expression into 245 an atom. */ 246static IRAtom* assignNew ( MCEnv* mce, IRType ty, IRExpr* e ) { 247 IRTemp t = newIRTemp(mce->bb->tyenv, ty); 248 assign(mce->bb, t, e); 249 return mkexpr(t); 250} 251 252 253/*------------------------------------------------------------*/ 254/*--- Constructing definedness primitive ops ---*/ 255/*------------------------------------------------------------*/ 256 257/* --------- Defined-if-either-defined --------- */ 258 259static IRAtom* mkDifD8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 260 tl_assert(isShadowAtom(mce,a1)); 261 tl_assert(isShadowAtom(mce,a2)); 262 return assignNew(mce, Ity_I8, binop(Iop_And8, a1, a2)); 263} 264 265static IRAtom* mkDifD16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 266 tl_assert(isShadowAtom(mce,a1)); 267 tl_assert(isShadowAtom(mce,a2)); 268 return assignNew(mce, Ity_I16, binop(Iop_And16, a1, a2)); 269} 270 271static IRAtom* mkDifD32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 272 tl_assert(isShadowAtom(mce,a1)); 273 tl_assert(isShadowAtom(mce,a2)); 274 return assignNew(mce, Ity_I32, binop(Iop_And32, a1, a2)); 275} 276 277static IRAtom* mkDifD64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 278 tl_assert(isShadowAtom(mce,a1)); 279 tl_assert(isShadowAtom(mce,a2)); 280 return assignNew(mce, Ity_I64, binop(Iop_And64, a1, a2)); 281} 282 283static IRAtom* mkDifDV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 284 tl_assert(isShadowAtom(mce,a1)); 285 tl_assert(isShadowAtom(mce,a2)); 286 return assignNew(mce, Ity_V128, binop(Iop_AndV128, a1, a2)); 287} 288 289/* --------- Undefined-if-either-undefined --------- */ 290 291static IRAtom* mkUifU8 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 292 tl_assert(isShadowAtom(mce,a1)); 293 tl_assert(isShadowAtom(mce,a2)); 294 return assignNew(mce, Ity_I8, binop(Iop_Or8, a1, a2)); 295} 296 297static IRAtom* mkUifU16 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 298 tl_assert(isShadowAtom(mce,a1)); 299 tl_assert(isShadowAtom(mce,a2)); 300 return assignNew(mce, Ity_I16, binop(Iop_Or16, a1, a2)); 301} 302 303static IRAtom* mkUifU32 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 304 tl_assert(isShadowAtom(mce,a1)); 305 tl_assert(isShadowAtom(mce,a2)); 306 return assignNew(mce, Ity_I32, binop(Iop_Or32, a1, a2)); 307} 308 309static IRAtom* mkUifU64 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 310 tl_assert(isShadowAtom(mce,a1)); 311 tl_assert(isShadowAtom(mce,a2)); 312 return assignNew(mce, Ity_I64, binop(Iop_Or64, a1, a2)); 313} 314 315static IRAtom* mkUifUV128 ( MCEnv* mce, IRAtom* a1, IRAtom* a2 ) { 316 tl_assert(isShadowAtom(mce,a1)); 317 tl_assert(isShadowAtom(mce,a2)); 318 return assignNew(mce, Ity_V128, binop(Iop_OrV128, a1, a2)); 319} 320 321static IRAtom* mkUifU ( MCEnv* mce, IRType vty, IRAtom* a1, IRAtom* a2 ) { 322 switch (vty) { 323 case Ity_I8: return mkUifU8(mce, a1, a2); 324 case Ity_I16: return mkUifU16(mce, a1, a2); 325 case Ity_I32: return mkUifU32(mce, a1, a2); 326 case Ity_I64: return mkUifU64(mce, a1, a2); 327 case Ity_V128: return mkUifUV128(mce, a1, a2); 328 default: 329 VG_(printf)("\n"); ppIRType(vty); VG_(printf)("\n"); 330 VG_(tool_panic)("memcheck:mkUifU"); 331 } 332} 333 334/* --------- The Left-family of operations. --------- */ 335 336static IRAtom* mkLeft8 ( MCEnv* mce, IRAtom* a1 ) { 337 tl_assert(isShadowAtom(mce,a1)); 338 /* It's safe to duplicate a1 since it's only an atom */ 339 return assignNew(mce, Ity_I8, 340 binop(Iop_Or8, a1, 341 assignNew(mce, Ity_I8, 342 unop(Iop_Neg8, a1)))); 343} 344 345static IRAtom* mkLeft16 ( MCEnv* mce, IRAtom* a1 ) { 346 tl_assert(isShadowAtom(mce,a1)); 347 /* It's safe to duplicate a1 since it's only an atom */ 348 return assignNew(mce, Ity_I16, 349 binop(Iop_Or16, a1, 350 assignNew(mce, Ity_I16, 351 unop(Iop_Neg16, a1)))); 352} 353 354static IRAtom* mkLeft32 ( MCEnv* mce, IRAtom* a1 ) { 355 tl_assert(isShadowAtom(mce,a1)); 356 /* It's safe to duplicate a1 since it's only an atom */ 357 return assignNew(mce, Ity_I32, 358 binop(Iop_Or32, a1, 359 assignNew(mce, Ity_I32, 360 unop(Iop_Neg32, a1)))); 361} 362 363static IRAtom* mkLeft64 ( MCEnv* mce, IRAtom* a1 ) { 364 tl_assert(isShadowAtom(mce,a1)); 365 /* It's safe to duplicate a1 since it's only an atom */ 366 return assignNew(mce, Ity_I64, 367 binop(Iop_Or64, a1, 368 assignNew(mce, Ity_I64, 369 unop(Iop_Neg64, a1)))); 370} 371 372/* --------- 'Improvement' functions for AND/OR. --------- */ 373 374/* ImproveAND(data, vbits) = data OR vbits. Defined (0) data 0s give 375 defined (0); all other -> undefined (1). 376*/ 377static IRAtom* mkImproveAND8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 378{ 379 tl_assert(isOriginalAtom(mce, data)); 380 tl_assert(isShadowAtom(mce, vbits)); 381 tl_assert(sameKindedAtoms(data, vbits)); 382 return assignNew(mce, Ity_I8, binop(Iop_Or8, data, vbits)); 383} 384 385static IRAtom* mkImproveAND16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 386{ 387 tl_assert(isOriginalAtom(mce, data)); 388 tl_assert(isShadowAtom(mce, vbits)); 389 tl_assert(sameKindedAtoms(data, vbits)); 390 return assignNew(mce, Ity_I16, binop(Iop_Or16, data, vbits)); 391} 392 393static IRAtom* mkImproveAND32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 394{ 395 tl_assert(isOriginalAtom(mce, data)); 396 tl_assert(isShadowAtom(mce, vbits)); 397 tl_assert(sameKindedAtoms(data, vbits)); 398 return assignNew(mce, Ity_I32, binop(Iop_Or32, data, vbits)); 399} 400 401static IRAtom* mkImproveAND64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 402{ 403 tl_assert(isOriginalAtom(mce, data)); 404 tl_assert(isShadowAtom(mce, vbits)); 405 tl_assert(sameKindedAtoms(data, vbits)); 406 return assignNew(mce, Ity_I64, binop(Iop_Or64, data, vbits)); 407} 408 409static IRAtom* mkImproveANDV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 410{ 411 tl_assert(isOriginalAtom(mce, data)); 412 tl_assert(isShadowAtom(mce, vbits)); 413 tl_assert(sameKindedAtoms(data, vbits)); 414 return assignNew(mce, Ity_V128, binop(Iop_OrV128, data, vbits)); 415} 416 417/* ImproveOR(data, vbits) = ~data OR vbits. Defined (0) data 1s give 418 defined (0); all other -> undefined (1). 419*/ 420static IRAtom* mkImproveOR8 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 421{ 422 tl_assert(isOriginalAtom(mce, data)); 423 tl_assert(isShadowAtom(mce, vbits)); 424 tl_assert(sameKindedAtoms(data, vbits)); 425 return assignNew( 426 mce, Ity_I8, 427 binop(Iop_Or8, 428 assignNew(mce, Ity_I8, unop(Iop_Not8, data)), 429 vbits) ); 430} 431 432static IRAtom* mkImproveOR16 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 433{ 434 tl_assert(isOriginalAtom(mce, data)); 435 tl_assert(isShadowAtom(mce, vbits)); 436 tl_assert(sameKindedAtoms(data, vbits)); 437 return assignNew( 438 mce, Ity_I16, 439 binop(Iop_Or16, 440 assignNew(mce, Ity_I16, unop(Iop_Not16, data)), 441 vbits) ); 442} 443 444static IRAtom* mkImproveOR32 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 445{ 446 tl_assert(isOriginalAtom(mce, data)); 447 tl_assert(isShadowAtom(mce, vbits)); 448 tl_assert(sameKindedAtoms(data, vbits)); 449 return assignNew( 450 mce, Ity_I32, 451 binop(Iop_Or32, 452 assignNew(mce, Ity_I32, unop(Iop_Not32, data)), 453 vbits) ); 454} 455 456static IRAtom* mkImproveOR64 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 457{ 458 tl_assert(isOriginalAtom(mce, data)); 459 tl_assert(isShadowAtom(mce, vbits)); 460 tl_assert(sameKindedAtoms(data, vbits)); 461 return assignNew( 462 mce, Ity_I64, 463 binop(Iop_Or64, 464 assignNew(mce, Ity_I64, unop(Iop_Not64, data)), 465 vbits) ); 466} 467 468static IRAtom* mkImproveORV128 ( MCEnv* mce, IRAtom* data, IRAtom* vbits ) 469{ 470 tl_assert(isOriginalAtom(mce, data)); 471 tl_assert(isShadowAtom(mce, vbits)); 472 tl_assert(sameKindedAtoms(data, vbits)); 473 return assignNew( 474 mce, Ity_V128, 475 binop(Iop_OrV128, 476 assignNew(mce, Ity_V128, unop(Iop_NotV128, data)), 477 vbits) ); 478} 479 480/* --------- Pessimising casts. --------- */ 481 482static IRAtom* mkPCastTo( MCEnv* mce, IRType dst_ty, IRAtom* vbits ) 483{ 484 IRType ty; 485 IRAtom* tmp1; 486 /* Note, dst_ty is a shadow type, not an original type. */ 487 /* First of all, collapse vbits down to a single bit. */ 488 tl_assert(isShadowAtom(mce,vbits)); 489 ty = typeOfIRExpr(mce->bb->tyenv, vbits); 490 tmp1 = NULL; 491 switch (ty) { 492 case Ity_I1: 493 tmp1 = vbits; 494 break; 495 case Ity_I8: 496 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ8, vbits)); 497 break; 498 case Ity_I16: 499 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ16, vbits)); 500 break; 501 case Ity_I32: 502 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ32, vbits)); 503 break; 504 case Ity_I64: 505 tmp1 = assignNew(mce, Ity_I1, unop(Iop_CmpNEZ64, vbits)); 506 break; 507 case Ity_I128: { 508 /* Gah. Chop it in half, OR the halves together, and compare 509 that with zero. */ 510 IRAtom* tmp2 = assignNew(mce, Ity_I64, unop(Iop_128HIto64, vbits)); 511 IRAtom* tmp3 = assignNew(mce, Ity_I64, unop(Iop_128to64, vbits)); 512 IRAtom* tmp4 = assignNew(mce, Ity_I64, binop(Iop_Or64, tmp2, tmp3)); 513 tmp1 = assignNew(mce, Ity_I1, 514 unop(Iop_CmpNEZ64, tmp4)); 515 break; 516 } 517 default: 518 ppIRType(ty); 519 VG_(tool_panic)("mkPCastTo(1)"); 520 } 521 tl_assert(tmp1); 522 /* Now widen up to the dst type. */ 523 switch (dst_ty) { 524 case Ity_I1: 525 return tmp1; 526 case Ity_I8: 527 return assignNew(mce, Ity_I8, unop(Iop_1Sto8, tmp1)); 528 case Ity_I16: 529 return assignNew(mce, Ity_I16, unop(Iop_1Sto16, tmp1)); 530 case Ity_I32: 531 return assignNew(mce, Ity_I32, unop(Iop_1Sto32, tmp1)); 532 case Ity_I64: 533 return assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 534 case Ity_V128: 535 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 536 tmp1 = assignNew(mce, Ity_V128, binop(Iop_64HLtoV128, tmp1, tmp1)); 537 return tmp1; 538 case Ity_I128: 539 tmp1 = assignNew(mce, Ity_I64, unop(Iop_1Sto64, tmp1)); 540 tmp1 = assignNew(mce, Ity_I128, binop(Iop_64HLto128, tmp1, tmp1)); 541 return tmp1; 542 default: 543 ppIRType(dst_ty); 544 VG_(tool_panic)("mkPCastTo(2)"); 545 } 546} 547 548/* --------- Accurate interpretation of CmpEQ/CmpNE. --------- */ 549/* 550 Normally, we can do CmpEQ/CmpNE by doing UifU on the arguments, and 551 PCasting to Ity_U1. However, sometimes it is necessary to be more 552 accurate. The insight is that the result is defined if two 553 corresponding bits can be found, one from each argument, so that 554 both bits are defined but are different -- that makes EQ say "No" 555 and NE say "Yes". Hence, we compute an improvement term and DifD 556 it onto the "normal" (UifU) result. 557 558 The result is: 559 560 PCastTo<1> ( 561 PCastTo<sz>( UifU<sz>(vxx, vyy) ) -- naive version 562 `DifD<sz>` 563 PCastTo<sz>( CmpEQ<sz>( vec, 1....1 ) ) -- improvement term 564 ) 565 where 566 vec contains 0 (defined) bits where the corresponding arg bits 567 are defined but different, and 1 bits otherwise: 568 569 vec = UifU<sz>( vxx, vyy, Not<sz>(Xor<sz>( xx, yy )) ) 570*/ 571static IRAtom* expensiveCmpEQorNE ( MCEnv* mce, 572 IRType ty, 573 IRAtom* vxx, IRAtom* vyy, 574 IRAtom* xx, IRAtom* yy ) 575{ 576 IRAtom *naive, *vec, *vec_cmpd, *improved, *final_cast, *top; 577 IROp opDIFD, opUIFU, opXOR, opNOT, opCMP; 578 579 tl_assert(isShadowAtom(mce,vxx)); 580 tl_assert(isShadowAtom(mce,vyy)); 581 tl_assert(isOriginalAtom(mce,xx)); 582 tl_assert(isOriginalAtom(mce,yy)); 583 tl_assert(sameKindedAtoms(vxx,xx)); 584 tl_assert(sameKindedAtoms(vyy,yy)); 585 586 switch (ty) { 587 case Ity_I32: 588 opDIFD = Iop_And32; 589 opUIFU = Iop_Or32; 590 opNOT = Iop_Not32; 591 opXOR = Iop_Xor32; 592 opCMP = Iop_CmpEQ32; 593 top = mkU32(0xFFFFFFFF); 594 break; 595 case Ity_I64: 596 opDIFD = Iop_And64; 597 opUIFU = Iop_Or64; 598 opNOT = Iop_Not64; 599 opXOR = Iop_Xor64; 600 opCMP = Iop_CmpEQ64; 601 top = mkU64(0xFFFFFFFFFFFFFFFFULL); 602 break; 603 default: 604 VG_(tool_panic)("expensiveCmpEQorNE"); 605 } 606 607 naive 608 = mkPCastTo(mce,ty, assignNew(mce, ty, binop(opUIFU, vxx, vyy))); 609 610 vec 611 = assignNew( 612 mce,ty, 613 binop( opUIFU, 614 assignNew(mce,ty, binop(opUIFU, vxx, vyy)), 615 assignNew( 616 mce,ty, 617 unop( opNOT, 618 assignNew(mce,ty, binop(opXOR, xx, yy)))))); 619 620 vec_cmpd 621 = mkPCastTo( mce,ty, assignNew(mce,Ity_I1, binop(opCMP, vec, top))); 622 623 improved 624 = assignNew( mce,ty, binop(opDIFD, naive, vec_cmpd) ); 625 626 final_cast 627 = mkPCastTo( mce, Ity_I1, improved ); 628 629 return final_cast; 630} 631 632 633/*------------------------------------------------------------*/ 634/*--- Emit a test and complaint if something is undefined. ---*/ 635/*------------------------------------------------------------*/ 636 637/* Set the annotations on a dirty helper to indicate that the stack 638 pointer and instruction pointers might be read. This is the 639 behaviour of all 'emit-a-complaint' style functions we might 640 call. */ 641 642static void setHelperAnns ( MCEnv* mce, IRDirty* di ) { 643 di->nFxState = 2; 644 di->fxState[0].fx = Ifx_Read; 645 di->fxState[0].offset = mce->layout->offset_SP; 646 di->fxState[0].size = mce->layout->sizeof_SP; 647 di->fxState[1].fx = Ifx_Read; 648 di->fxState[1].offset = mce->layout->offset_IP; 649 di->fxState[1].size = mce->layout->sizeof_IP; 650} 651 652 653/* Check the supplied **original** atom for undefinedness, and emit a 654 complaint if so. Once that happens, mark it as defined. This is 655 possible because the atom is either a tmp or literal. If it's a 656 tmp, it will be shadowed by a tmp, and so we can set the shadow to 657 be defined. In fact as mentioned above, we will have to allocate a 658 new tmp to carry the new 'defined' shadow value, and update the 659 original->tmp mapping accordingly; we cannot simply assign a new 660 value to an existing shadow tmp as this breaks SSAness -- resulting 661 in the post-instrumentation sanity checker spluttering in disapproval. 662*/ 663static void complainIfUndefined ( MCEnv* mce, IRAtom* atom ) 664{ 665 IRAtom* vatom; 666 IRType ty; 667 Int sz; 668 IRDirty* di; 669 IRAtom* cond; 670 671 /* Since the original expression is atomic, there's no duplicated 672 work generated by making multiple V-expressions for it. So we 673 don't really care about the possibility that someone else may 674 also create a V-interpretion for it. */ 675 tl_assert(isOriginalAtom(mce, atom)); 676 vatom = expr2vbits( mce, atom ); 677 tl_assert(isShadowAtom(mce, vatom)); 678 tl_assert(sameKindedAtoms(atom, vatom)); 679 680 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 681 682 /* sz is only used for constructing the error message */ 683 sz = ty==Ity_I1 ? 0 : sizeofIRType(ty); 684 685 cond = mkPCastTo( mce, Ity_I1, vatom ); 686 /* cond will be 0 if all defined, and 1 if any not defined. */ 687 688 switch (sz) { 689 case 0: 690 di = unsafeIRDirty_0_N( 0/*regparms*/, 691 "MC_(helperc_value_check0_fail)", 692 &MC_(helperc_value_check0_fail), 693 mkIRExprVec_0() 694 ); 695 break; 696 case 1: 697 di = unsafeIRDirty_0_N( 0/*regparms*/, 698 "MC_(helperc_value_check1_fail)", 699 &MC_(helperc_value_check1_fail), 700 mkIRExprVec_0() 701 ); 702 break; 703 case 4: 704 di = unsafeIRDirty_0_N( 0/*regparms*/, 705 "MC_(helperc_value_check4_fail)", 706 &MC_(helperc_value_check4_fail), 707 mkIRExprVec_0() 708 ); 709 break; 710 case 8: 711 di = unsafeIRDirty_0_N( 0/*regparms*/, 712 "MC_(helperc_value_check8_fail)", 713 &MC_(helperc_value_check8_fail), 714 mkIRExprVec_0() 715 ); 716 break; 717 default: 718 di = unsafeIRDirty_0_N( 1/*regparms*/, 719 "MC_(helperc_complain_undef)", 720 &MC_(helperc_complain_undef), 721 mkIRExprVec_1( mkIRExpr_HWord( sz )) 722 ); 723 break; 724 } 725 di->guard = cond; 726 setHelperAnns( mce, di ); 727 stmt( mce->bb, IRStmt_Dirty(di)); 728 729 /* Set the shadow tmp to be defined. First, update the 730 orig->shadow tmp mapping to reflect the fact that this shadow is 731 getting a new value. */ 732 tl_assert(isIRAtom(vatom)); 733 /* sameKindedAtoms ... */ 734 if (vatom->tag == Iex_Tmp) { 735 tl_assert(atom->tag == Iex_Tmp); 736 newShadowTmp(mce, atom->Iex.Tmp.tmp); 737 assign(mce->bb, findShadowTmp(mce, atom->Iex.Tmp.tmp), 738 definedOfType(ty)); 739 } 740} 741 742 743/*------------------------------------------------------------*/ 744/*--- Shadowing PUTs/GETs, and indexed variants thereof ---*/ 745/*------------------------------------------------------------*/ 746 747/* Examine the always-defined sections declared in layout to see if 748 the (offset,size) section is within one. Note, is is an error to 749 partially fall into such a region: (offset,size) should either be 750 completely in such a region or completely not-in such a region. 751*/ 752static Bool isAlwaysDefd ( MCEnv* mce, Int offset, Int size ) 753{ 754 Int minoffD, maxoffD, i; 755 Int minoff = offset; 756 Int maxoff = minoff + size - 1; 757 tl_assert((minoff & ~0xFFFF) == 0); 758 tl_assert((maxoff & ~0xFFFF) == 0); 759 760 for (i = 0; i < mce->layout->n_alwaysDefd; i++) { 761 minoffD = mce->layout->alwaysDefd[i].offset; 762 maxoffD = minoffD + mce->layout->alwaysDefd[i].size - 1; 763 tl_assert((minoffD & ~0xFFFF) == 0); 764 tl_assert((maxoffD & ~0xFFFF) == 0); 765 766 if (maxoff < minoffD || maxoffD < minoff) 767 continue; /* no overlap */ 768 if (minoff >= minoffD && maxoff <= maxoffD) 769 return True; /* completely contained in an always-defd section */ 770 771 VG_(tool_panic)("memcheck:isAlwaysDefd:partial overlap"); 772 } 773 return False; /* could not find any containing section */ 774} 775 776 777/* Generate into bb suitable actions to shadow this Put. If the state 778 slice is marked 'always defined', do nothing. Otherwise, write the 779 supplied V bits to the shadow state. We can pass in either an 780 original atom or a V-atom, but not both. In the former case the 781 relevant V-bits are then generated from the original. 782*/ 783static 784void do_shadow_PUT ( MCEnv* mce, Int offset, 785 IRAtom* atom, IRAtom* vatom ) 786{ 787 IRType ty; 788 if (atom) { 789 tl_assert(!vatom); 790 tl_assert(isOriginalAtom(mce, atom)); 791 vatom = expr2vbits( mce, atom ); 792 } else { 793 tl_assert(vatom); 794 tl_assert(isShadowAtom(mce, vatom)); 795 } 796 797 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 798 tl_assert(ty != Ity_I1); 799 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 800 /* later: no ... */ 801 /* emit code to emit a complaint if any of the vbits are 1. */ 802 /* complainIfUndefined(mce, atom); */ 803 } else { 804 /* Do a plain shadow Put. */ 805 stmt( mce->bb, IRStmt_Put( offset + mce->layout->total_sizeB, vatom ) ); 806 } 807} 808 809 810/* Return an expression which contains the V bits corresponding to the 811 given GETI (passed in in pieces). 812*/ 813static 814void do_shadow_PUTI ( MCEnv* mce, 815 IRArray* descr, IRAtom* ix, Int bias, IRAtom* atom ) 816{ 817 IRAtom* vatom; 818 IRType ty, tyS; 819 Int arrSize;; 820 821 tl_assert(isOriginalAtom(mce,atom)); 822 vatom = expr2vbits( mce, atom ); 823 tl_assert(sameKindedAtoms(atom, vatom)); 824 ty = descr->elemTy; 825 tyS = shadowType(ty); 826 arrSize = descr->nElems * sizeofIRType(ty); 827 tl_assert(ty != Ity_I1); 828 tl_assert(isOriginalAtom(mce,ix)); 829 complainIfUndefined(mce,ix); 830 if (isAlwaysDefd(mce, descr->base, arrSize)) { 831 /* later: no ... */ 832 /* emit code to emit a complaint if any of the vbits are 1. */ 833 /* complainIfUndefined(mce, atom); */ 834 } else { 835 /* Do a cloned version of the Put that refers to the shadow 836 area. */ 837 IRArray* new_descr 838 = mkIRArray( descr->base + mce->layout->total_sizeB, 839 tyS, descr->nElems); 840 stmt( mce->bb, IRStmt_PutI( new_descr, ix, bias, vatom )); 841 } 842} 843 844 845/* Return an expression which contains the V bits corresponding to the 846 given GET (passed in in pieces). 847*/ 848static 849IRExpr* shadow_GET ( MCEnv* mce, Int offset, IRType ty ) 850{ 851 IRType tyS = shadowType(ty); 852 tl_assert(ty != Ity_I1); 853 if (isAlwaysDefd(mce, offset, sizeofIRType(ty))) { 854 /* Always defined, return all zeroes of the relevant type */ 855 return definedOfType(tyS); 856 } else { 857 /* return a cloned version of the Get that refers to the shadow 858 area. */ 859 return IRExpr_Get( offset + mce->layout->total_sizeB, tyS ); 860 } 861} 862 863 864/* Return an expression which contains the V bits corresponding to the 865 given GETI (passed in in pieces). 866*/ 867static 868IRExpr* shadow_GETI ( MCEnv* mce, IRArray* descr, IRAtom* ix, Int bias ) 869{ 870 IRType ty = descr->elemTy; 871 IRType tyS = shadowType(ty); 872 Int arrSize = descr->nElems * sizeofIRType(ty); 873 tl_assert(ty != Ity_I1); 874 tl_assert(isOriginalAtom(mce,ix)); 875 complainIfUndefined(mce,ix); 876 if (isAlwaysDefd(mce, descr->base, arrSize)) { 877 /* Always defined, return all zeroes of the relevant type */ 878 return definedOfType(tyS); 879 } else { 880 /* return a cloned version of the Get that refers to the shadow 881 area. */ 882 IRArray* new_descr 883 = mkIRArray( descr->base + mce->layout->total_sizeB, 884 tyS, descr->nElems); 885 return IRExpr_GetI( new_descr, ix, bias ); 886 } 887} 888 889 890/*------------------------------------------------------------*/ 891/*--- Generating approximations for unknown operations, ---*/ 892/*--- using lazy-propagate semantics ---*/ 893/*------------------------------------------------------------*/ 894 895/* Lazy propagation of undefinedness from two values, resulting in the 896 specified shadow type. 897*/ 898static 899IRAtom* mkLazy2 ( MCEnv* mce, IRType finalVty, IRAtom* va1, IRAtom* va2 ) 900{ 901 IRAtom* at; 902 IRType t1 = typeOfIRExpr(mce->bb->tyenv, va1); 903 IRType t2 = typeOfIRExpr(mce->bb->tyenv, va2); 904 tl_assert(isShadowAtom(mce,va1)); 905 tl_assert(isShadowAtom(mce,va2)); 906 907 /* The general case is inefficient because PCast is an expensive 908 operation. Here are some special cases which use PCast only 909 once rather than twice. */ 910 911 /* I64 x I64 -> I64 */ 912 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I64) { 913 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I64\n"); 914 at = mkUifU(mce, Ity_I64, va1, va2); 915 at = mkPCastTo(mce, Ity_I64, at); 916 return at; 917 } 918 919 /* I64 x I64 -> I32 */ 920 if (t1 == Ity_I64 && t2 == Ity_I64 && finalVty == Ity_I32) { 921 if (0) VG_(printf)("mkLazy2: I64 x I64 -> I32\n"); 922 at = mkUifU(mce, Ity_I64, va1, va2); 923 at = mkPCastTo(mce, Ity_I32, at); 924 return at; 925 } 926 927 if (0) { 928 VG_(printf)("mkLazy2 "); 929 ppIRType(t1); 930 VG_(printf)("_"); 931 ppIRType(t2); 932 VG_(printf)("_"); 933 ppIRType(finalVty); 934 VG_(printf)("\n"); 935 } 936 937 /* General case: force everything via 32-bit intermediaries. */ 938 at = mkPCastTo(mce, Ity_I32, va1); 939 at = mkUifU(mce, Ity_I32, at, mkPCastTo(mce, Ity_I32, va2)); 940 at = mkPCastTo(mce, finalVty, at); 941 return at; 942} 943 944 945/* Do the lazy propagation game from a null-terminated vector of 946 atoms. This is presumably the arguments to a helper call, so the 947 IRCallee info is also supplied in order that we can know which 948 arguments should be ignored (via the .mcx_mask field). 949*/ 950static 951IRAtom* mkLazyN ( MCEnv* mce, 952 IRAtom** exprvec, IRType finalVtype, IRCallee* cee ) 953{ 954 Int i; 955 IRAtom* here; 956 IRAtom* curr = definedOfType(Ity_I32); 957 for (i = 0; exprvec[i]; i++) { 958 tl_assert(i < 32); 959 tl_assert(isOriginalAtom(mce, exprvec[i])); 960 /* Only take notice of this arg if the callee's mc-exclusion 961 mask does not say it is to be excluded. */ 962 if (cee->mcx_mask & (1<<i)) { 963 /* the arg is to be excluded from definedness checking. Do 964 nothing. */ 965 if (0) VG_(printf)("excluding %s(%d)\n", cee->name, i); 966 } else { 967 /* calculate the arg's definedness, and pessimistically merge 968 it in. */ 969 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, exprvec[i]) ); 970 curr = mkUifU32(mce, here, curr); 971 } 972 } 973 return mkPCastTo(mce, finalVtype, curr ); 974} 975 976 977/*------------------------------------------------------------*/ 978/*--- Generating expensive sequences for exact carry-chain ---*/ 979/*--- propagation in add/sub and related operations. ---*/ 980/*------------------------------------------------------------*/ 981 982static 983IRAtom* expensiveAddSub ( MCEnv* mce, 984 Bool add, 985 IRType ty, 986 IRAtom* qaa, IRAtom* qbb, 987 IRAtom* aa, IRAtom* bb ) 988{ 989 IRAtom *a_min, *b_min, *a_max, *b_max; 990 IROp opAND, opOR, opXOR, opNOT, opADD, opSUB; 991 992 tl_assert(isShadowAtom(mce,qaa)); 993 tl_assert(isShadowAtom(mce,qbb)); 994 tl_assert(isOriginalAtom(mce,aa)); 995 tl_assert(isOriginalAtom(mce,bb)); 996 tl_assert(sameKindedAtoms(qaa,aa)); 997 tl_assert(sameKindedAtoms(qbb,bb)); 998 999 switch (ty) { 1000 case Ity_I32: 1001 opAND = Iop_And32; 1002 opOR = Iop_Or32; 1003 opXOR = Iop_Xor32; 1004 opNOT = Iop_Not32; 1005 opADD = Iop_Add32; 1006 opSUB = Iop_Sub32; 1007 break; 1008 default: 1009 VG_(tool_panic)("expensiveAddSub"); 1010 } 1011 1012 // a_min = aa & ~qaa 1013 a_min = assignNew(mce,ty, 1014 binop(opAND, aa, 1015 assignNew(mce,ty, unop(opNOT, qaa)))); 1016 1017 // b_min = bb & ~qbb 1018 b_min = assignNew(mce,ty, 1019 binop(opAND, bb, 1020 assignNew(mce,ty, unop(opNOT, qbb)))); 1021 1022 // a_max = aa | qaa 1023 a_max = assignNew(mce,ty, binop(opOR, aa, qaa)); 1024 1025 // b_max = bb | qbb 1026 b_max = assignNew(mce,ty, binop(opOR, bb, qbb)); 1027 1028 if (add) { 1029 // result = (qaa | qbb) | ((a_min + b_min) ^ (a_max + b_max)) 1030 return 1031 assignNew(mce,ty, 1032 binop( opOR, 1033 assignNew(mce,ty, binop(opOR, qaa, qbb)), 1034 assignNew(mce,ty, 1035 binop( opXOR, 1036 assignNew(mce,ty, binop(opADD, a_min, b_min)), 1037 assignNew(mce,ty, binop(opADD, a_max, b_max)) 1038 ) 1039 ) 1040 ) 1041 ); 1042 } else { 1043 // result = (qaa | qbb) | ((a_min - b_max) ^ (a_max + b_min)) 1044 return 1045 assignNew(mce,ty, 1046 binop( opOR, 1047 assignNew(mce,ty, binop(opOR, qaa, qbb)), 1048 assignNew(mce,ty, 1049 binop( opXOR, 1050 assignNew(mce,ty, binop(opSUB, a_min, b_max)), 1051 assignNew(mce,ty, binop(opSUB, a_max, b_min)) 1052 ) 1053 ) 1054 ) 1055 ); 1056 } 1057 1058} 1059 1060 1061/*------------------------------------------------------------*/ 1062/*--- Helpers for dealing with vector primops. ---*/ 1063/*------------------------------------------------------------*/ 1064 1065/* Vector pessimisation -- pessimise within each lane individually. */ 1066 1067static IRAtom* mkPCast8x16 ( MCEnv* mce, IRAtom* at ) 1068{ 1069 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ8x16, at)); 1070} 1071 1072static IRAtom* mkPCast16x8 ( MCEnv* mce, IRAtom* at ) 1073{ 1074 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ16x8, at)); 1075} 1076 1077static IRAtom* mkPCast32x4 ( MCEnv* mce, IRAtom* at ) 1078{ 1079 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ32x4, at)); 1080} 1081 1082static IRAtom* mkPCast64x2 ( MCEnv* mce, IRAtom* at ) 1083{ 1084 return assignNew(mce, Ity_V128, unop(Iop_CmpNEZ64x2, at)); 1085} 1086 1087static IRAtom* mkPCast32x2 ( MCEnv* mce, IRAtom* at ) 1088{ 1089 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ32x2, at)); 1090} 1091 1092static IRAtom* mkPCast16x4 ( MCEnv* mce, IRAtom* at ) 1093{ 1094 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ16x4, at)); 1095} 1096 1097static IRAtom* mkPCast8x8 ( MCEnv* mce, IRAtom* at ) 1098{ 1099 return assignNew(mce, Ity_I64, unop(Iop_CmpNEZ8x8, at)); 1100} 1101 1102 1103/* Here's a simple scheme capable of handling ops derived from SSE1 1104 code and while only generating ops that can be efficiently 1105 implemented in SSE1. */ 1106 1107/* All-lanes versions are straightforward: 1108 1109 binary32Fx4(x,y) ==> PCast32x4(UifUV128(x#,y#)) 1110 1111 unary32Fx4(x,y) ==> PCast32x4(x#) 1112 1113 Lowest-lane-only versions are more complex: 1114 1115 binary32F0x4(x,y) ==> SetV128lo32( 1116 x#, 1117 PCast32(V128to32(UifUV128(x#,y#))) 1118 ) 1119 1120 This is perhaps not so obvious. In particular, it's faster to 1121 do a V128-bit UifU and then take the bottom 32 bits than the more 1122 obvious scheme of taking the bottom 32 bits of each operand 1123 and doing a 32-bit UifU. Basically since UifU is fast and 1124 chopping lanes off vector values is slow. 1125 1126 Finally: 1127 1128 unary32F0x4(x) ==> SetV128lo32( 1129 x#, 1130 PCast32(V128to32(x#)) 1131 ) 1132 1133 Where: 1134 1135 PCast32(v#) = 1Sto32(CmpNE32(v#,0)) 1136 PCast32x4(v#) = CmpNEZ32x4(v#) 1137*/ 1138 1139static 1140IRAtom* binary32Fx4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1141{ 1142 IRAtom* at; 1143 tl_assert(isShadowAtom(mce, vatomX)); 1144 tl_assert(isShadowAtom(mce, vatomY)); 1145 at = mkUifUV128(mce, vatomX, vatomY); 1146 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, at)); 1147 return at; 1148} 1149 1150static 1151IRAtom* unary32Fx4 ( MCEnv* mce, IRAtom* vatomX ) 1152{ 1153 IRAtom* at; 1154 tl_assert(isShadowAtom(mce, vatomX)); 1155 at = assignNew(mce, Ity_V128, mkPCast32x4(mce, vatomX)); 1156 return at; 1157} 1158 1159static 1160IRAtom* binary32F0x4 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1161{ 1162 IRAtom* at; 1163 tl_assert(isShadowAtom(mce, vatomX)); 1164 tl_assert(isShadowAtom(mce, vatomY)); 1165 at = mkUifUV128(mce, vatomX, vatomY); 1166 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, at)); 1167 at = mkPCastTo(mce, Ity_I32, at); 1168 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1169 return at; 1170} 1171 1172static 1173IRAtom* unary32F0x4 ( MCEnv* mce, IRAtom* vatomX ) 1174{ 1175 IRAtom* at; 1176 tl_assert(isShadowAtom(mce, vatomX)); 1177 at = assignNew(mce, Ity_I32, unop(Iop_V128to32, vatomX)); 1178 at = mkPCastTo(mce, Ity_I32, at); 1179 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo32, vatomX, at)); 1180 return at; 1181} 1182 1183/* --- ... and ... 64Fx2 versions of the same ... --- */ 1184 1185static 1186IRAtom* binary64Fx2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1187{ 1188 IRAtom* at; 1189 tl_assert(isShadowAtom(mce, vatomX)); 1190 tl_assert(isShadowAtom(mce, vatomY)); 1191 at = mkUifUV128(mce, vatomX, vatomY); 1192 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, at)); 1193 return at; 1194} 1195 1196static 1197IRAtom* unary64Fx2 ( MCEnv* mce, IRAtom* vatomX ) 1198{ 1199 IRAtom* at; 1200 tl_assert(isShadowAtom(mce, vatomX)); 1201 at = assignNew(mce, Ity_V128, mkPCast64x2(mce, vatomX)); 1202 return at; 1203} 1204 1205static 1206IRAtom* binary64F0x2 ( MCEnv* mce, IRAtom* vatomX, IRAtom* vatomY ) 1207{ 1208 IRAtom* at; 1209 tl_assert(isShadowAtom(mce, vatomX)); 1210 tl_assert(isShadowAtom(mce, vatomY)); 1211 at = mkUifUV128(mce, vatomX, vatomY); 1212 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, at)); 1213 at = mkPCastTo(mce, Ity_I64, at); 1214 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1215 return at; 1216} 1217 1218static 1219IRAtom* unary64F0x2 ( MCEnv* mce, IRAtom* vatomX ) 1220{ 1221 IRAtom* at; 1222 tl_assert(isShadowAtom(mce, vatomX)); 1223 at = assignNew(mce, Ity_I64, unop(Iop_V128to64, vatomX)); 1224 at = mkPCastTo(mce, Ity_I64, at); 1225 at = assignNew(mce, Ity_V128, binop(Iop_SetV128lo64, vatomX, at)); 1226 return at; 1227} 1228 1229/* --- --- Vector saturated narrowing --- --- */ 1230 1231/* This is quite subtle. What to do is simple: 1232 1233 Let the original narrowing op be QNarrowW{S,U}xN. Produce: 1234 1235 the-narrowing-op( PCastWxN(vatom1), PCastWxN(vatom2)) 1236 1237 Why this is right is not so simple. Consider a lane in the args, 1238 vatom1 or 2, doesn't matter. 1239 1240 After the PCast, that lane is all 0s (defined) or all 1241 1s(undefined). 1242 1243 Both signed and unsigned saturating narrowing of all 0s produces 1244 all 0s, which is what we want. 1245 1246 The all-1s case is more complex. Unsigned narrowing interprets an 1247 all-1s input as the largest unsigned integer, and so produces all 1248 1s as a result since that is the largest unsigned value at the 1249 smaller width. 1250 1251 Signed narrowing interprets all 1s as -1. Fortunately, -1 narrows 1252 to -1, so we still wind up with all 1s at the smaller width. 1253 1254 So: In short, pessimise the args, then apply the original narrowing 1255 op. 1256*/ 1257static 1258IRAtom* vectorNarrowV128 ( MCEnv* mce, IROp narrow_op, 1259 IRAtom* vatom1, IRAtom* vatom2) 1260{ 1261 IRAtom *at1, *at2, *at3; 1262 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1263 switch (narrow_op) { 1264 case Iop_QNarrow32Sx4: pcast = mkPCast32x4; break; 1265 case Iop_QNarrow16Sx8: pcast = mkPCast16x8; break; 1266 case Iop_QNarrow16Ux8: pcast = mkPCast16x8; break; 1267 default: VG_(tool_panic)("vectorNarrowV128"); 1268 } 1269 tl_assert(isShadowAtom(mce,vatom1)); 1270 tl_assert(isShadowAtom(mce,vatom2)); 1271 at1 = assignNew(mce, Ity_V128, pcast(mce, vatom1)); 1272 at2 = assignNew(mce, Ity_V128, pcast(mce, vatom2)); 1273 at3 = assignNew(mce, Ity_V128, binop(narrow_op, at1, at2)); 1274 return at3; 1275} 1276 1277static 1278IRAtom* vectorNarrow64 ( MCEnv* mce, IROp narrow_op, 1279 IRAtom* vatom1, IRAtom* vatom2) 1280{ 1281 IRAtom *at1, *at2, *at3; 1282 IRAtom* (*pcast)( MCEnv*, IRAtom* ); 1283 switch (narrow_op) { 1284 case Iop_QNarrow32Sx2: pcast = mkPCast32x2; break; 1285 case Iop_QNarrow16Sx4: pcast = mkPCast16x4; break; 1286 case Iop_QNarrow16Ux4: pcast = mkPCast16x4; break; 1287 default: VG_(tool_panic)("vectorNarrow64"); 1288 } 1289 tl_assert(isShadowAtom(mce,vatom1)); 1290 tl_assert(isShadowAtom(mce,vatom2)); 1291 at1 = assignNew(mce, Ity_I64, pcast(mce, vatom1)); 1292 at2 = assignNew(mce, Ity_I64, pcast(mce, vatom2)); 1293 at3 = assignNew(mce, Ity_I64, binop(narrow_op, at1, at2)); 1294 return at3; 1295} 1296 1297 1298/* --- --- Vector integer arithmetic --- --- */ 1299 1300/* Simple ... UifU the args and per-lane pessimise the results. */ 1301 1302/* --- V128-bit versions --- */ 1303 1304static 1305IRAtom* binary8Ix16 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1306{ 1307 IRAtom* at; 1308 at = mkUifUV128(mce, vatom1, vatom2); 1309 at = mkPCast8x16(mce, at); 1310 return at; 1311} 1312 1313static 1314IRAtom* binary16Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1315{ 1316 IRAtom* at; 1317 at = mkUifUV128(mce, vatom1, vatom2); 1318 at = mkPCast16x8(mce, at); 1319 return at; 1320} 1321 1322static 1323IRAtom* binary32Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1324{ 1325 IRAtom* at; 1326 at = mkUifUV128(mce, vatom1, vatom2); 1327 at = mkPCast32x4(mce, at); 1328 return at; 1329} 1330 1331static 1332IRAtom* binary64Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1333{ 1334 IRAtom* at; 1335 at = mkUifUV128(mce, vatom1, vatom2); 1336 at = mkPCast64x2(mce, at); 1337 return at; 1338} 1339 1340/* --- 64-bit versions --- */ 1341 1342static 1343IRAtom* binary8Ix8 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1344{ 1345 IRAtom* at; 1346 at = mkUifU64(mce, vatom1, vatom2); 1347 at = mkPCast8x8(mce, at); 1348 return at; 1349} 1350 1351static 1352IRAtom* binary16Ix4 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1353{ 1354 IRAtom* at; 1355 at = mkUifU64(mce, vatom1, vatom2); 1356 at = mkPCast16x4(mce, at); 1357 return at; 1358} 1359 1360static 1361IRAtom* binary32Ix2 ( MCEnv* mce, IRAtom* vatom1, IRAtom* vatom2 ) 1362{ 1363 IRAtom* at; 1364 at = mkUifU64(mce, vatom1, vatom2); 1365 at = mkPCast32x2(mce, at); 1366 return at; 1367} 1368 1369 1370/*------------------------------------------------------------*/ 1371/*--- Generate shadow values from all kinds of IRExprs. ---*/ 1372/*------------------------------------------------------------*/ 1373 1374static 1375IRAtom* expr2vbits_Binop ( MCEnv* mce, 1376 IROp op, 1377 IRAtom* atom1, IRAtom* atom2 ) 1378{ 1379 IRType and_or_ty; 1380 IRAtom* (*uifu) (MCEnv*, IRAtom*, IRAtom*); 1381 IRAtom* (*difd) (MCEnv*, IRAtom*, IRAtom*); 1382 IRAtom* (*improve) (MCEnv*, IRAtom*, IRAtom*); 1383 1384 IRAtom* vatom1 = expr2vbits( mce, atom1 ); 1385 IRAtom* vatom2 = expr2vbits( mce, atom2 ); 1386 1387 tl_assert(isOriginalAtom(mce,atom1)); 1388 tl_assert(isOriginalAtom(mce,atom2)); 1389 tl_assert(isShadowAtom(mce,vatom1)); 1390 tl_assert(isShadowAtom(mce,vatom2)); 1391 tl_assert(sameKindedAtoms(atom1,vatom1)); 1392 tl_assert(sameKindedAtoms(atom2,vatom2)); 1393 switch (op) { 1394 1395 /* 64-bit SIMD */ 1396 1397 case Iop_ShrN16x4: 1398 case Iop_ShrN32x2: 1399 case Iop_SarN16x4: 1400 case Iop_SarN32x2: 1401 case Iop_ShlN16x4: 1402 case Iop_ShlN32x2: 1403 /* Same scheme as with all other shifts. */ 1404 complainIfUndefined(mce, atom2); 1405 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2)); 1406 1407 case Iop_QNarrow32Sx2: 1408 case Iop_QNarrow16Sx4: 1409 case Iop_QNarrow16Ux4: 1410 return vectorNarrow64(mce, op, vatom1, vatom2); 1411 1412 case Iop_Min8Ux8: 1413 case Iop_Max8Ux8: 1414 case Iop_Avg8Ux8: 1415 case Iop_QSub8Sx8: 1416 case Iop_QSub8Ux8: 1417 case Iop_Sub8x8: 1418 case Iop_CmpGT8Sx8: 1419 case Iop_CmpEQ8x8: 1420 case Iop_QAdd8Sx8: 1421 case Iop_QAdd8Ux8: 1422 case Iop_Add8x8: 1423 return binary8Ix8(mce, vatom1, vatom2); 1424 1425 case Iop_Min16Sx4: 1426 case Iop_Max16Sx4: 1427 case Iop_Avg16Ux4: 1428 case Iop_QSub16Ux4: 1429 case Iop_QSub16Sx4: 1430 case Iop_Sub16x4: 1431 case Iop_Mul16x4: 1432 case Iop_MulHi16Sx4: 1433 case Iop_MulHi16Ux4: 1434 case Iop_CmpGT16Sx4: 1435 case Iop_CmpEQ16x4: 1436 case Iop_QAdd16Sx4: 1437 case Iop_QAdd16Ux4: 1438 case Iop_Add16x4: 1439 return binary16Ix4(mce, vatom1, vatom2); 1440 1441 case Iop_Sub32x2: 1442 case Iop_CmpGT32Sx2: 1443 case Iop_CmpEQ32x2: 1444 case Iop_Add32x2: 1445 return binary32Ix2(mce, vatom1, vatom2); 1446 1447 /* 64-bit data-steering */ 1448 case Iop_InterleaveLO32x2: 1449 case Iop_InterleaveLO16x4: 1450 case Iop_InterleaveLO8x8: 1451 case Iop_InterleaveHI32x2: 1452 case Iop_InterleaveHI16x4: 1453 case Iop_InterleaveHI8x8: 1454 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2)); 1455 1456 /* V128-bit SIMD */ 1457 1458 case Iop_ShrN16x8: 1459 case Iop_ShrN32x4: 1460 case Iop_ShrN64x2: 1461 case Iop_SarN16x8: 1462 case Iop_SarN32x4: 1463 case Iop_ShlN16x8: 1464 case Iop_ShlN32x4: 1465 case Iop_ShlN64x2: 1466 /* Same scheme as with all other shifts. */ 1467 complainIfUndefined(mce, atom2); 1468 return assignNew(mce, Ity_V128, binop(op, vatom1, atom2)); 1469 1470 case Iop_QSub8Ux16: 1471 case Iop_QSub8Sx16: 1472 case Iop_Sub8x16: 1473 case Iop_Min8Ux16: 1474 case Iop_Max8Ux16: 1475 case Iop_CmpGT8Sx16: 1476 case Iop_CmpEQ8x16: 1477 case Iop_Avg8Ux16: 1478 case Iop_QAdd8Ux16: 1479 case Iop_QAdd8Sx16: 1480 case Iop_Add8x16: 1481 return binary8Ix16(mce, vatom1, vatom2); 1482 1483 case Iop_QSub16Ux8: 1484 case Iop_QSub16Sx8: 1485 case Iop_Sub16x8: 1486 case Iop_Mul16x8: 1487 case Iop_MulHi16Sx8: 1488 case Iop_MulHi16Ux8: 1489 case Iop_Min16Sx8: 1490 case Iop_Max16Sx8: 1491 case Iop_CmpGT16Sx8: 1492 case Iop_CmpEQ16x8: 1493 case Iop_Avg16Ux8: 1494 case Iop_QAdd16Ux8: 1495 case Iop_QAdd16Sx8: 1496 case Iop_Add16x8: 1497 return binary16Ix8(mce, vatom1, vatom2); 1498 1499 case Iop_Sub32x4: 1500 case Iop_CmpGT32Sx4: 1501 case Iop_CmpEQ32x4: 1502 case Iop_Add32x4: 1503 return binary32Ix4(mce, vatom1, vatom2); 1504 1505 case Iop_Sub64x2: 1506 case Iop_Add64x2: 1507 return binary64Ix2(mce, vatom1, vatom2); 1508 1509 case Iop_QNarrow32Sx4: 1510 case Iop_QNarrow16Sx8: 1511 case Iop_QNarrow16Ux8: 1512 return vectorNarrowV128(mce, op, vatom1, vatom2); 1513 1514 case Iop_Sub64Fx2: 1515 case Iop_Mul64Fx2: 1516 case Iop_Min64Fx2: 1517 case Iop_Max64Fx2: 1518 case Iop_Div64Fx2: 1519 case Iop_CmpLT64Fx2: 1520 case Iop_CmpLE64Fx2: 1521 case Iop_CmpEQ64Fx2: 1522 case Iop_Add64Fx2: 1523 return binary64Fx2(mce, vatom1, vatom2); 1524 1525 case Iop_Sub64F0x2: 1526 case Iop_Mul64F0x2: 1527 case Iop_Min64F0x2: 1528 case Iop_Max64F0x2: 1529 case Iop_Div64F0x2: 1530 case Iop_CmpLT64F0x2: 1531 case Iop_CmpLE64F0x2: 1532 case Iop_CmpEQ64F0x2: 1533 case Iop_Add64F0x2: 1534 return binary64F0x2(mce, vatom1, vatom2); 1535 1536 case Iop_Sub32Fx4: 1537 case Iop_Mul32Fx4: 1538 case Iop_Min32Fx4: 1539 case Iop_Max32Fx4: 1540 case Iop_Div32Fx4: 1541 case Iop_CmpLT32Fx4: 1542 case Iop_CmpLE32Fx4: 1543 case Iop_CmpEQ32Fx4: 1544 case Iop_Add32Fx4: 1545 return binary32Fx4(mce, vatom1, vatom2); 1546 1547 case Iop_Sub32F0x4: 1548 case Iop_Mul32F0x4: 1549 case Iop_Min32F0x4: 1550 case Iop_Max32F0x4: 1551 case Iop_Div32F0x4: 1552 case Iop_CmpLT32F0x4: 1553 case Iop_CmpLE32F0x4: 1554 case Iop_CmpEQ32F0x4: 1555 case Iop_Add32F0x4: 1556 return binary32F0x4(mce, vatom1, vatom2); 1557 1558 /* V128-bit data-steering */ 1559 case Iop_SetV128lo32: 1560 case Iop_SetV128lo64: 1561 case Iop_64HLtoV128: 1562 case Iop_InterleaveLO64x2: 1563 case Iop_InterleaveLO32x4: 1564 case Iop_InterleaveLO16x8: 1565 case Iop_InterleaveLO8x16: 1566 case Iop_InterleaveHI64x2: 1567 case Iop_InterleaveHI32x4: 1568 case Iop_InterleaveHI16x8: 1569 case Iop_InterleaveHI8x16: 1570 return assignNew(mce, Ity_V128, binop(op, vatom1, vatom2)); 1571 1572 /* I128-bit data-steering */ 1573 case Iop_64HLto128: 1574 return assignNew(mce, Ity_I128, binop(op, vatom1, vatom2)); 1575 1576 /* Scalar floating point */ 1577 1578 case Iop_RoundF64: 1579 case Iop_F64toI64: 1580 case Iop_I64toF64: 1581 /* First arg is I32 (rounding mode), second is F64 or I64 1582 (data). */ 1583 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1584 1585 case Iop_PRemC3210F64: case Iop_PRem1C3210F64: 1586 /* Takes two F64 args. */ 1587 case Iop_F64toI32: 1588 case Iop_F64toF32: 1589 /* First arg is I32 (rounding mode), second is F64 (data). */ 1590 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 1591 1592 case Iop_F64toI16: 1593 /* First arg is I32 (rounding mode), second is F64 (data). */ 1594 return mkLazy2(mce, Ity_I16, vatom1, vatom2); 1595 1596 case Iop_ScaleF64: 1597 case Iop_Yl2xF64: 1598 case Iop_Yl2xp1F64: 1599 case Iop_PRemF64: 1600 case Iop_PRem1F64: 1601 case Iop_AtanF64: 1602 case Iop_AddF64: 1603 case Iop_DivF64: 1604 case Iop_SubF64: 1605 case Iop_MulF64: 1606 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1607 1608 case Iop_CmpF64: 1609 return mkLazy2(mce, Ity_I32, vatom1, vatom2); 1610 1611 /* non-FP after here */ 1612 1613 case Iop_DivModU64to32: 1614 case Iop_DivModS64to32: 1615 return mkLazy2(mce, Ity_I64, vatom1, vatom2); 1616 1617 case Iop_DivModU128to64: 1618 case Iop_DivModS128to64: 1619 return mkLazy2(mce, Ity_I128, vatom1, vatom2); 1620 1621 case Iop_16HLto32: 1622 return assignNew(mce, Ity_I32, binop(op, vatom1, vatom2)); 1623 case Iop_32HLto64: 1624 return assignNew(mce, Ity_I64, binop(op, vatom1, vatom2)); 1625 1626 case Iop_MullS64: 1627 case Iop_MullU64: { 1628 IRAtom* vLo64 = mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 1629 IRAtom* vHi64 = mkPCastTo(mce, Ity_I64, vLo64); 1630 return assignNew(mce, Ity_I128, binop(Iop_64HLto128, vHi64, vLo64)); 1631 } 1632 1633 case Iop_MullS32: 1634 case Iop_MullU32: { 1635 IRAtom* vLo32 = mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 1636 IRAtom* vHi32 = mkPCastTo(mce, Ity_I32, vLo32); 1637 return assignNew(mce, Ity_I64, binop(Iop_32HLto64, vHi32, vLo32)); 1638 } 1639 1640 case Iop_MullS16: 1641 case Iop_MullU16: { 1642 IRAtom* vLo16 = mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 1643 IRAtom* vHi16 = mkPCastTo(mce, Ity_I16, vLo16); 1644 return assignNew(mce, Ity_I32, binop(Iop_16HLto32, vHi16, vLo16)); 1645 } 1646 1647 case Iop_MullS8: 1648 case Iop_MullU8: { 1649 IRAtom* vLo8 = mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 1650 IRAtom* vHi8 = mkPCastTo(mce, Ity_I8, vLo8); 1651 return assignNew(mce, Ity_I16, binop(Iop_8HLto16, vHi8, vLo8)); 1652 } 1653 1654 case Iop_Add32: 1655 if (mce->bogusLiterals) 1656 return expensiveAddSub(mce,True,Ity_I32, 1657 vatom1,vatom2, atom1,atom2); 1658 else 1659 goto cheap_AddSub32; 1660 case Iop_Sub32: 1661 if (mce->bogusLiterals) 1662 return expensiveAddSub(mce,False,Ity_I32, 1663 vatom1,vatom2, atom1,atom2); 1664 else 1665 goto cheap_AddSub32; 1666 1667 cheap_AddSub32: 1668 case Iop_Mul32: 1669 return mkLeft32(mce, mkUifU32(mce, vatom1,vatom2)); 1670 1671 /* could do better: Add64, Sub64 */ 1672 case Iop_Mul64: 1673 case Iop_Add64: 1674 case Iop_Sub64: 1675 return mkLeft64(mce, mkUifU64(mce, vatom1,vatom2)); 1676 1677 case Iop_Mul16: 1678 case Iop_Add16: 1679 case Iop_Sub16: 1680 return mkLeft16(mce, mkUifU16(mce, vatom1,vatom2)); 1681 1682 case Iop_Sub8: 1683 case Iop_Add8: 1684 return mkLeft8(mce, mkUifU8(mce, vatom1,vatom2)); 1685 1686 case Iop_CmpEQ64: 1687 if (mce->bogusLiterals) 1688 return expensiveCmpEQorNE(mce,Ity_I64, vatom1,vatom2, atom1,atom2 ); 1689 else 1690 goto cheap_cmp64; 1691 cheap_cmp64: 1692 case Iop_CmpLE64S: case Iop_CmpLE64U: 1693 case Iop_CmpLT64U: case Iop_CmpLT64S: 1694 case Iop_CmpNE64: 1695 return mkPCastTo(mce, Ity_I1, mkUifU64(mce, vatom1,vatom2)); 1696 1697 case Iop_CmpEQ32: 1698 if (mce->bogusLiterals) 1699 return expensiveCmpEQorNE(mce,Ity_I32, vatom1,vatom2, atom1,atom2 ); 1700 else 1701 goto cheap_cmp32; 1702 cheap_cmp32: 1703 case Iop_CmpLE32S: case Iop_CmpLE32U: 1704 case Iop_CmpLT32U: case Iop_CmpLT32S: 1705 case Iop_CmpNE32: 1706 return mkPCastTo(mce, Ity_I1, mkUifU32(mce, vatom1,vatom2)); 1707 1708 case Iop_CmpEQ16: case Iop_CmpNE16: 1709 return mkPCastTo(mce, Ity_I1, mkUifU16(mce, vatom1,vatom2)); 1710 1711 case Iop_CmpEQ8: case Iop_CmpNE8: 1712 return mkPCastTo(mce, Ity_I1, mkUifU8(mce, vatom1,vatom2)); 1713 1714 case Iop_Shl32: case Iop_Shr32: case Iop_Sar32: 1715 /* Complain if the shift amount is undefined. Then simply 1716 shift the first arg's V bits by the real shift amount. */ 1717 complainIfUndefined(mce, atom2); 1718 return assignNew(mce, Ity_I32, binop(op, vatom1, atom2)); 1719 1720 case Iop_Shl16: case Iop_Shr16: case Iop_Sar16: 1721 /* Same scheme as with 32-bit shifts. */ 1722 complainIfUndefined(mce, atom2); 1723 return assignNew(mce, Ity_I16, binop(op, vatom1, atom2)); 1724 1725 case Iop_Shl8: case Iop_Shr8: 1726 /* Same scheme as with 32-bit shifts. */ 1727 complainIfUndefined(mce, atom2); 1728 return assignNew(mce, Ity_I8, binop(op, vatom1, atom2)); 1729 1730 case Iop_Shl64: case Iop_Shr64: case Iop_Sar64: 1731 /* Same scheme as with 32-bit shifts. */ 1732 complainIfUndefined(mce, atom2); 1733 return assignNew(mce, Ity_I64, binop(op, vatom1, atom2)); 1734 1735 case Iop_AndV128: 1736 uifu = mkUifUV128; difd = mkDifDV128; 1737 and_or_ty = Ity_V128; improve = mkImproveANDV128; goto do_And_Or; 1738 case Iop_And64: 1739 uifu = mkUifU64; difd = mkDifD64; 1740 and_or_ty = Ity_I64; improve = mkImproveAND64; goto do_And_Or; 1741 case Iop_And32: 1742 uifu = mkUifU32; difd = mkDifD32; 1743 and_or_ty = Ity_I32; improve = mkImproveAND32; goto do_And_Or; 1744 case Iop_And16: 1745 uifu = mkUifU16; difd = mkDifD16; 1746 and_or_ty = Ity_I16; improve = mkImproveAND16; goto do_And_Or; 1747 case Iop_And8: 1748 uifu = mkUifU8; difd = mkDifD8; 1749 and_or_ty = Ity_I8; improve = mkImproveAND8; goto do_And_Or; 1750 1751 case Iop_OrV128: 1752 uifu = mkUifUV128; difd = mkDifDV128; 1753 and_or_ty = Ity_V128; improve = mkImproveORV128; goto do_And_Or; 1754 case Iop_Or64: 1755 uifu = mkUifU64; difd = mkDifD64; 1756 and_or_ty = Ity_I64; improve = mkImproveOR64; goto do_And_Or; 1757 case Iop_Or32: 1758 uifu = mkUifU32; difd = mkDifD32; 1759 and_or_ty = Ity_I32; improve = mkImproveOR32; goto do_And_Or; 1760 case Iop_Or16: 1761 uifu = mkUifU16; difd = mkDifD16; 1762 and_or_ty = Ity_I16; improve = mkImproveOR16; goto do_And_Or; 1763 case Iop_Or8: 1764 uifu = mkUifU8; difd = mkDifD8; 1765 and_or_ty = Ity_I8; improve = mkImproveOR8; goto do_And_Or; 1766 1767 do_And_Or: 1768 return 1769 assignNew( 1770 mce, 1771 and_or_ty, 1772 difd(mce, uifu(mce, vatom1, vatom2), 1773 difd(mce, improve(mce, atom1, vatom1), 1774 improve(mce, atom2, vatom2) ) ) ); 1775 1776 case Iop_Xor8: 1777 return mkUifU8(mce, vatom1, vatom2); 1778 case Iop_Xor16: 1779 return mkUifU16(mce, vatom1, vatom2); 1780 case Iop_Xor32: 1781 return mkUifU32(mce, vatom1, vatom2); 1782 case Iop_Xor64: 1783 return mkUifU64(mce, vatom1, vatom2); 1784 case Iop_XorV128: 1785 return mkUifUV128(mce, vatom1, vatom2); 1786 1787 default: 1788 ppIROp(op); 1789 VG_(tool_panic)("memcheck:expr2vbits_Binop"); 1790 } 1791} 1792 1793 1794static 1795IRExpr* expr2vbits_Unop ( MCEnv* mce, IROp op, IRAtom* atom ) 1796{ 1797 IRAtom* vatom = expr2vbits( mce, atom ); 1798 tl_assert(isOriginalAtom(mce,atom)); 1799 switch (op) { 1800 1801 case Iop_Sqrt64Fx2: 1802 return unary64Fx2(mce, vatom); 1803 1804 case Iop_Sqrt64F0x2: 1805 return unary64F0x2(mce, vatom); 1806 1807 case Iop_Sqrt32Fx4: 1808 case Iop_RSqrt32Fx4: 1809 case Iop_Recip32Fx4: 1810 return unary32Fx4(mce, vatom); 1811 1812 case Iop_Sqrt32F0x4: 1813 case Iop_RSqrt32F0x4: 1814 case Iop_Recip32F0x4: 1815 return unary32F0x4(mce, vatom); 1816 1817 case Iop_32UtoV128: 1818 case Iop_64UtoV128: 1819 return assignNew(mce, Ity_V128, unop(op, vatom)); 1820 1821 case Iop_F32toF64: 1822 case Iop_I32toF64: 1823 case Iop_NegF64: 1824 case Iop_SinF64: 1825 case Iop_CosF64: 1826 case Iop_TanF64: 1827 case Iop_SqrtF64: 1828 case Iop_AbsF64: 1829 case Iop_2xm1F64: 1830 return mkPCastTo(mce, Ity_I64, vatom); 1831 1832 case Iop_Clz32: 1833 case Iop_Ctz32: 1834 return mkPCastTo(mce, Ity_I32, vatom); 1835 1836 case Iop_1Uto64: 1837 case Iop_8Uto64: 1838 case Iop_8Sto64: 1839 case Iop_16Uto64: 1840 case Iop_16Sto64: 1841 case Iop_32Sto64: 1842 case Iop_32Uto64: 1843 case Iop_V128to64: 1844 case Iop_V128HIto64: 1845 case Iop_128HIto64: 1846 case Iop_128to64: 1847 return assignNew(mce, Ity_I64, unop(op, vatom)); 1848 1849 case Iop_64to32: 1850 case Iop_64HIto32: 1851 case Iop_1Uto32: 1852 case Iop_8Uto32: 1853 case Iop_16Uto32: 1854 case Iop_16Sto32: 1855 case Iop_8Sto32: 1856 return assignNew(mce, Ity_I32, unop(op, vatom)); 1857 1858 case Iop_8Sto16: 1859 case Iop_8Uto16: 1860 case Iop_32to16: 1861 case Iop_32HIto16: 1862 case Iop_64to16: 1863 return assignNew(mce, Ity_I16, unop(op, vatom)); 1864 1865 case Iop_1Uto8: 1866 case Iop_16to8: 1867 case Iop_32to8: 1868 case Iop_64to8: 1869 return assignNew(mce, Ity_I8, unop(op, vatom)); 1870 1871 case Iop_32to1: 1872 return assignNew(mce, Ity_I1, unop(Iop_32to1, vatom)); 1873 1874 case Iop_64to1: 1875 return assignNew(mce, Ity_I1, unop(Iop_64to1, vatom)); 1876 1877 case Iop_ReinterpF64asI64: 1878 case Iop_ReinterpI64asF64: 1879 case Iop_ReinterpI32asF32: 1880 case Iop_NotV128: 1881 case Iop_Not64: 1882 case Iop_Not32: 1883 case Iop_Not16: 1884 case Iop_Not8: 1885 case Iop_Not1: 1886 return vatom; 1887 1888 /* Neg* really fall under the Add/Sub banner, and as such you 1889 might think would qualify for the 'expensive add/sub' 1890 treatment. However, in this case since the implied literal 1891 is zero (0 - arg), we just do the cheap thing anyway. */ 1892 case Iop_Neg8: 1893 return mkLeft8(mce, vatom); 1894 case Iop_Neg16: 1895 return mkLeft16(mce, vatom); 1896 case Iop_Neg32: 1897 return mkLeft32(mce, vatom); 1898 1899 default: 1900 ppIROp(op); 1901 VG_(tool_panic)("memcheck:expr2vbits_Unop"); 1902 } 1903} 1904 1905 1906/* Worker function; do not call directly. */ 1907static 1908IRAtom* expr2vbits_LDle_WRK ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias ) 1909{ 1910 void* helper; 1911 Char* hname; 1912 IRDirty* di; 1913 IRTemp datavbits; 1914 IRAtom* addrAct; 1915 1916 tl_assert(isOriginalAtom(mce,addr)); 1917 1918 /* First, emit a definedness test for the address. This also sets 1919 the address (shadow) to 'defined' following the test. */ 1920 complainIfUndefined( mce, addr ); 1921 1922 /* Now cook up a call to the relevant helper function, to read the 1923 data V bits from shadow memory. */ 1924 ty = shadowType(ty); 1925 switch (ty) { 1926 case Ity_I64: helper = &MC_(helperc_LOADV8); 1927 hname = "MC_(helperc_LOADV8)"; 1928 break; 1929 case Ity_I32: helper = &MC_(helperc_LOADV4); 1930 hname = "MC_(helperc_LOADV4)"; 1931 break; 1932 case Ity_I16: helper = &MC_(helperc_LOADV2); 1933 hname = "MC_(helperc_LOADV2)"; 1934 break; 1935 case Ity_I8: helper = &MC_(helperc_LOADV1); 1936 hname = "MC_(helperc_LOADV1)"; 1937 break; 1938 default: ppIRType(ty); 1939 VG_(tool_panic)("memcheck:do_shadow_LDle"); 1940 } 1941 1942 /* Generate the actual address into addrAct. */ 1943 if (bias == 0) { 1944 addrAct = addr; 1945 } else { 1946 IROp mkAdd; 1947 IRAtom* eBias; 1948 IRType tyAddr = mce->hWordTy; 1949 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 1950 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 1951 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 1952 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) ); 1953 } 1954 1955 /* We need to have a place to park the V bits we're just about to 1956 read. */ 1957 datavbits = newIRTemp(mce->bb->tyenv, ty); 1958 di = unsafeIRDirty_1_N( datavbits, 1959 1/*regparms*/, hname, helper, 1960 mkIRExprVec_1( addrAct )); 1961 setHelperAnns( mce, di ); 1962 stmt( mce->bb, IRStmt_Dirty(di) ); 1963 1964 return mkexpr(datavbits); 1965} 1966 1967 1968static 1969IRAtom* expr2vbits_LDle ( MCEnv* mce, IRType ty, IRAtom* addr, UInt bias ) 1970{ 1971 IRAtom *v64hi, *v64lo; 1972 switch (shadowType(ty)) { 1973 case Ity_I8: 1974 case Ity_I16: 1975 case Ity_I32: 1976 case Ity_I64: 1977 return expr2vbits_LDle_WRK(mce, ty, addr, bias); 1978 case Ity_V128: 1979 v64lo = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias); 1980 v64hi = expr2vbits_LDle_WRK(mce, Ity_I64, addr, bias+8); 1981 return assignNew( mce, 1982 Ity_V128, 1983 binop(Iop_64HLtoV128, v64hi, v64lo)); 1984 default: 1985 VG_(tool_panic)("expr2vbits_LDle"); 1986 } 1987} 1988 1989 1990static 1991IRAtom* expr2vbits_Mux0X ( MCEnv* mce, 1992 IRAtom* cond, IRAtom* expr0, IRAtom* exprX ) 1993{ 1994 IRAtom *vbitsC, *vbits0, *vbitsX; 1995 IRType ty; 1996 /* Given Mux0X(cond,expr0,exprX), generate 1997 Mux0X(cond,expr0#,exprX#) `UifU` PCast(cond#) 1998 That is, steer the V bits like the originals, but trash the 1999 result if the steering value is undefined. This gives 2000 lazy propagation. */ 2001 tl_assert(isOriginalAtom(mce, cond)); 2002 tl_assert(isOriginalAtom(mce, expr0)); 2003 tl_assert(isOriginalAtom(mce, exprX)); 2004 2005 vbitsC = expr2vbits(mce, cond); 2006 vbits0 = expr2vbits(mce, expr0); 2007 vbitsX = expr2vbits(mce, exprX); 2008 ty = typeOfIRExpr(mce->bb->tyenv, vbits0); 2009 2010 return 2011 mkUifU(mce, ty, assignNew(mce, ty, IRExpr_Mux0X(cond, vbits0, vbitsX)), 2012 mkPCastTo(mce, ty, vbitsC) ); 2013} 2014 2015/* --------- This is the main expression-handling function. --------- */ 2016 2017static 2018IRExpr* expr2vbits ( MCEnv* mce, IRExpr* e ) 2019{ 2020 switch (e->tag) { 2021 2022 case Iex_Get: 2023 return shadow_GET( mce, e->Iex.Get.offset, e->Iex.Get.ty ); 2024 2025 case Iex_GetI: 2026 return shadow_GETI( mce, e->Iex.GetI.descr, 2027 e->Iex.GetI.ix, e->Iex.GetI.bias ); 2028 2029 case Iex_Tmp: 2030 return IRExpr_Tmp( findShadowTmp(mce, e->Iex.Tmp.tmp) ); 2031 2032 case Iex_Const: 2033 return definedOfType(shadowType(typeOfIRExpr(mce->bb->tyenv, e))); 2034 2035 case Iex_Binop: 2036 return expr2vbits_Binop( 2037 mce, 2038 e->Iex.Binop.op, 2039 e->Iex.Binop.arg1, e->Iex.Binop.arg2 2040 ); 2041 2042 case Iex_Unop: 2043 return expr2vbits_Unop( mce, e->Iex.Unop.op, e->Iex.Unop.arg ); 2044 2045 case Iex_LDle: 2046 return expr2vbits_LDle( mce, e->Iex.LDle.ty, 2047 e->Iex.LDle.addr, 0/*addr bias*/ ); 2048 2049 case Iex_CCall: 2050 return mkLazyN( mce, e->Iex.CCall.args, 2051 e->Iex.CCall.retty, 2052 e->Iex.CCall.cee ); 2053 2054 case Iex_Mux0X: 2055 return expr2vbits_Mux0X( mce, e->Iex.Mux0X.cond, e->Iex.Mux0X.expr0, 2056 e->Iex.Mux0X.exprX); 2057 2058 default: 2059 VG_(printf)("\n"); 2060 ppIRExpr(e); 2061 VG_(printf)("\n"); 2062 VG_(tool_panic)("memcheck: expr2vbits"); 2063 } 2064} 2065 2066/*------------------------------------------------------------*/ 2067/*--- Generate shadow stmts from all kinds of IRStmts. ---*/ 2068/*------------------------------------------------------------*/ 2069 2070/* Widen a value to the host word size. */ 2071 2072static 2073IRExpr* zwidenToHostWord ( MCEnv* mce, IRAtom* vatom ) 2074{ 2075 IRType ty, tyH; 2076 2077 /* vatom is vbits-value and as such can only have a shadow type. */ 2078 tl_assert(isShadowAtom(mce,vatom)); 2079 2080 ty = typeOfIRExpr(mce->bb->tyenv, vatom); 2081 tyH = mce->hWordTy; 2082 2083 if (tyH == Ity_I32) { 2084 switch (ty) { 2085 case Ity_I32: return vatom; 2086 case Ity_I16: return assignNew(mce, tyH, unop(Iop_16Uto32, vatom)); 2087 case Ity_I8: return assignNew(mce, tyH, unop(Iop_8Uto32, vatom)); 2088 default: goto unhandled; 2089 } 2090 } else 2091 if (tyH == Ity_I64) { 2092 switch (ty) { 2093 case Ity_I32: return assignNew(mce, tyH, unop(Iop_32Uto64, vatom)); 2094 case Ity_I16: return assignNew(mce, tyH, unop(Iop_32Uto64, 2095 assignNew(mce, Ity_I32, unop(Iop_16Uto32, vatom)))); 2096 case Ity_I8: return assignNew(mce, tyH, unop(Iop_32Uto64, 2097 assignNew(mce, Ity_I32, unop(Iop_8Uto32, vatom)))); 2098 default: goto unhandled; 2099 } 2100 } else { 2101 goto unhandled; 2102 } 2103 unhandled: 2104 VG_(printf)("\nty = "); ppIRType(ty); VG_(printf)("\n"); 2105 VG_(tool_panic)("zwidenToHostWord"); 2106} 2107 2108 2109/* Generate a shadow store. addr is always the original address atom. 2110 You can pass in either originals or V-bits for the data atom, but 2111 obviously not both. */ 2112 2113static 2114void do_shadow_STle ( MCEnv* mce, 2115 IRAtom* addr, UInt bias, 2116 IRAtom* data, IRAtom* vdata ) 2117{ 2118 IROp mkAdd; 2119 IRType ty, tyAddr; 2120 IRDirty *di, *diLo64, *diHi64; 2121 IRAtom *addrAct, *addrLo64, *addrHi64; 2122 IRAtom *vdataLo64, *vdataHi64; 2123 IRAtom *eBias, *eBias0, *eBias8; 2124 void* helper = NULL; 2125 Char* hname = NULL; 2126 2127 tyAddr = mce->hWordTy; 2128 mkAdd = tyAddr==Ity_I32 ? Iop_Add32 : Iop_Add64; 2129 tl_assert( tyAddr == Ity_I32 || tyAddr == Ity_I64 ); 2130 2131 di = diLo64 = diHi64 = NULL; 2132 eBias = eBias0 = eBias8 = NULL; 2133 addrAct = addrLo64 = addrHi64 = NULL; 2134 vdataLo64 = vdataHi64 = NULL; 2135 2136 if (data) { 2137 tl_assert(!vdata); 2138 tl_assert(isOriginalAtom(mce, data)); 2139 tl_assert(bias == 0); 2140 vdata = expr2vbits( mce, data ); 2141 } else { 2142 tl_assert(vdata); 2143 } 2144 2145 tl_assert(isOriginalAtom(mce,addr)); 2146 tl_assert(isShadowAtom(mce,vdata)); 2147 2148 ty = typeOfIRExpr(mce->bb->tyenv, vdata); 2149 2150 /* First, emit a definedness test for the address. This also sets 2151 the address (shadow) to 'defined' following the test. */ 2152 complainIfUndefined( mce, addr ); 2153 2154 /* Now decide which helper function to call to write the data V 2155 bits into shadow memory. */ 2156 switch (ty) { 2157 case Ity_V128: /* we'll use the helper twice */ 2158 case Ity_I64: helper = &MC_(helperc_STOREV8); 2159 hname = "MC_(helperc_STOREV8)"; 2160 break; 2161 case Ity_I32: helper = &MC_(helperc_STOREV4); 2162 hname = "MC_(helperc_STOREV4)"; 2163 break; 2164 case Ity_I16: helper = &MC_(helperc_STOREV2); 2165 hname = "MC_(helperc_STOREV2)"; 2166 break; 2167 case Ity_I8: helper = &MC_(helperc_STOREV1); 2168 hname = "MC_(helperc_STOREV1)"; 2169 break; 2170 default: VG_(tool_panic)("memcheck:do_shadow_STle"); 2171 } 2172 2173 if (ty == Ity_V128) { 2174 2175 /* V128-bit case */ 2176 /* See comment in next clause re 64-bit regparms */ 2177 eBias0 = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 2178 addrLo64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias0) ); 2179 vdataLo64 = assignNew(mce, Ity_I64, unop(Iop_V128to64, vdata)); 2180 diLo64 = unsafeIRDirty_0_N( 2181 1/*regparms*/, hname, helper, 2182 mkIRExprVec_2( addrLo64, vdataLo64 )); 2183 2184 eBias8 = tyAddr==Ity_I32 ? mkU32(bias+8) : mkU64(bias+8); 2185 addrHi64 = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias8) ); 2186 vdataHi64 = assignNew(mce, Ity_I64, unop(Iop_V128HIto64, vdata)); 2187 diHi64 = unsafeIRDirty_0_N( 2188 1/*regparms*/, hname, helper, 2189 mkIRExprVec_2( addrHi64, vdataHi64 )); 2190 2191 setHelperAnns( mce, diLo64 ); 2192 setHelperAnns( mce, diHi64 ); 2193 stmt( mce->bb, IRStmt_Dirty(diLo64) ); 2194 stmt( mce->bb, IRStmt_Dirty(diHi64) ); 2195 2196 } else { 2197 2198 /* 8/16/32/64-bit cases */ 2199 /* Generate the actual address into addrAct. */ 2200 if (bias == 0) { 2201 addrAct = addr; 2202 } else { 2203 eBias = tyAddr==Ity_I32 ? mkU32(bias) : mkU64(bias); 2204 addrAct = assignNew(mce, tyAddr, binop(mkAdd, addr, eBias) ); 2205 } 2206 2207 if (ty == Ity_I64) { 2208 /* We can't do this with regparm 2 on 32-bit platforms, since 2209 the back ends aren't clever enough to handle 64-bit 2210 regparm args. Therefore be different. */ 2211 di = unsafeIRDirty_0_N( 2212 1/*regparms*/, hname, helper, 2213 mkIRExprVec_2( addrAct, vdata )); 2214 } else { 2215 di = unsafeIRDirty_0_N( 2216 2/*regparms*/, hname, helper, 2217 mkIRExprVec_2( addrAct, 2218 zwidenToHostWord( mce, vdata ))); 2219 } 2220 setHelperAnns( mce, di ); 2221 stmt( mce->bb, IRStmt_Dirty(di) ); 2222 } 2223 2224} 2225 2226 2227/* Do lazy pessimistic propagation through a dirty helper call, by 2228 looking at the annotations on it. This is the most complex part of 2229 Memcheck. */ 2230 2231static IRType szToITy ( Int n ) 2232{ 2233 switch (n) { 2234 case 1: return Ity_I8; 2235 case 2: return Ity_I16; 2236 case 4: return Ity_I32; 2237 case 8: return Ity_I64; 2238 default: VG_(tool_panic)("szToITy(memcheck)"); 2239 } 2240} 2241 2242static 2243void do_shadow_Dirty ( MCEnv* mce, IRDirty* d ) 2244{ 2245 Int i, n, offset, toDo, gSz, gOff; 2246 IRAtom *src, *here, *curr; 2247 IRType tyAddr, tySrc, tyDst; 2248 IRTemp dst; 2249 2250 /* First check the guard. */ 2251 complainIfUndefined(mce, d->guard); 2252 2253 /* Now round up all inputs and PCast over them. */ 2254 curr = definedOfType(Ity_I32); 2255 2256 /* Inputs: unmasked args */ 2257 for (i = 0; d->args[i]; i++) { 2258 if (d->cee->mcx_mask & (1<<i)) { 2259 /* ignore this arg */ 2260 } else { 2261 here = mkPCastTo( mce, Ity_I32, expr2vbits(mce, d->args[i]) ); 2262 curr = mkUifU32(mce, here, curr); 2263 } 2264 } 2265 2266 /* Inputs: guest state that we read. */ 2267 for (i = 0; i < d->nFxState; i++) { 2268 tl_assert(d->fxState[i].fx != Ifx_None); 2269 if (d->fxState[i].fx == Ifx_Write) 2270 continue; 2271 2272 /* Ignore any sections marked as 'always defined'. */ 2273 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) { 2274 if (0) 2275 VG_(printf)("memcheck: Dirty gst: ignored off %d, sz %d\n", 2276 d->fxState[i].offset, d->fxState[i].size ); 2277 continue; 2278 } 2279 2280 /* This state element is read or modified. So we need to 2281 consider it. If larger than 8 bytes, deal with it in 8-byte 2282 chunks. */ 2283 gSz = d->fxState[i].size; 2284 gOff = d->fxState[i].offset; 2285 tl_assert(gSz > 0); 2286 while (True) { 2287 if (gSz == 0) break; 2288 n = gSz <= 8 ? gSz : 8; 2289 /* update 'curr' with UifU of the state slice 2290 gOff .. gOff+n-1 */ 2291 tySrc = szToITy( n ); 2292 src = assignNew( mce, tySrc, 2293 shadow_GET(mce, gOff, tySrc ) ); 2294 here = mkPCastTo( mce, Ity_I32, src ); 2295 curr = mkUifU32(mce, here, curr); 2296 gSz -= n; 2297 gOff += n; 2298 } 2299 2300 } 2301 2302 /* Inputs: memory. First set up some info needed regardless of 2303 whether we're doing reads or writes. */ 2304 tyAddr = Ity_INVALID; 2305 2306 if (d->mFx != Ifx_None) { 2307 /* Because we may do multiple shadow loads/stores from the same 2308 base address, it's best to do a single test of its 2309 definedness right now. Post-instrumentation optimisation 2310 should remove all but this test. */ 2311 tl_assert(d->mAddr); 2312 complainIfUndefined(mce, d->mAddr); 2313 2314 tyAddr = typeOfIRExpr(mce->bb->tyenv, d->mAddr); 2315 tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64); 2316 tl_assert(tyAddr == mce->hWordTy); /* not really right */ 2317 } 2318 2319 /* Deal with memory inputs (reads or modifies) */ 2320 if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) { 2321 offset = 0; 2322 toDo = d->mSize; 2323 /* chew off 32-bit chunks */ 2324 while (toDo >= 4) { 2325 here = mkPCastTo( 2326 mce, Ity_I32, 2327 expr2vbits_LDle ( mce, Ity_I32, 2328 d->mAddr, d->mSize - toDo ) 2329 ); 2330 curr = mkUifU32(mce, here, curr); 2331 toDo -= 4; 2332 } 2333 /* chew off 16-bit chunks */ 2334 while (toDo >= 2) { 2335 here = mkPCastTo( 2336 mce, Ity_I32, 2337 expr2vbits_LDle ( mce, Ity_I16, 2338 d->mAddr, d->mSize - toDo ) 2339 ); 2340 curr = mkUifU32(mce, here, curr); 2341 toDo -= 2; 2342 } 2343 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 2344 } 2345 2346 /* Whew! So curr is a 32-bit V-value summarising pessimistically 2347 all the inputs to the helper. Now we need to re-distribute the 2348 results to all destinations. */ 2349 2350 /* Outputs: the destination temporary, if there is one. */ 2351 if (d->tmp != IRTemp_INVALID) { 2352 dst = findShadowTmp(mce, d->tmp); 2353 tyDst = typeOfIRTemp(mce->bb->tyenv, d->tmp); 2354 assign( mce->bb, dst, mkPCastTo( mce, tyDst, curr) ); 2355 } 2356 2357 /* Outputs: guest state that we write or modify. */ 2358 for (i = 0; i < d->nFxState; i++) { 2359 tl_assert(d->fxState[i].fx != Ifx_None); 2360 if (d->fxState[i].fx == Ifx_Read) 2361 continue; 2362 /* Ignore any sections marked as 'always defined'. */ 2363 if (isAlwaysDefd(mce, d->fxState[i].offset, d->fxState[i].size )) 2364 continue; 2365 /* This state element is written or modified. So we need to 2366 consider it. If larger than 8 bytes, deal with it in 8-byte 2367 chunks. */ 2368 gSz = d->fxState[i].size; 2369 gOff = d->fxState[i].offset; 2370 tl_assert(gSz > 0); 2371 while (True) { 2372 if (gSz == 0) break; 2373 n = gSz <= 8 ? gSz : 8; 2374 /* Write suitably-casted 'curr' to the state slice 2375 gOff .. gOff+n-1 */ 2376 tyDst = szToITy( n ); 2377 do_shadow_PUT( mce, gOff, 2378 NULL, /* original atom */ 2379 mkPCastTo( mce, tyDst, curr ) ); 2380 gSz -= n; 2381 gOff += n; 2382 } 2383 } 2384 2385 /* Outputs: memory that we write or modify. */ 2386 if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) { 2387 offset = 0; 2388 toDo = d->mSize; 2389 /* chew off 32-bit chunks */ 2390 while (toDo >= 4) { 2391 do_shadow_STle( mce, d->mAddr, d->mSize - toDo, 2392 NULL, /* original data */ 2393 mkPCastTo( mce, Ity_I32, curr ) ); 2394 toDo -= 4; 2395 } 2396 /* chew off 16-bit chunks */ 2397 while (toDo >= 2) { 2398 do_shadow_STle( mce, d->mAddr, d->mSize - toDo, 2399 NULL, /* original data */ 2400 mkPCastTo( mce, Ity_I16, curr ) ); 2401 toDo -= 2; 2402 } 2403 tl_assert(toDo == 0); /* also need to handle 1-byte excess */ 2404 } 2405 2406} 2407 2408/* We have an ABI hint telling us that [base .. base+len-1] is to 2409 become undefined ("writable"). Generate code to call a helper to 2410 notify the A/V bit machinery of this fact. 2411 2412 We call 2413 void MC_(helperc_MAKE_STACK_UNINIT) ( Addr base, UWord len ); 2414*/ 2415static 2416void do_AbiHint ( MCEnv* mce, IRExpr* base, Int len ) 2417{ 2418 IRDirty* di; 2419 di = unsafeIRDirty_0_N( 2420 0/*regparms*/, 2421 "MC_(helperc_MAKE_STACK_UNINIT)", 2422 &MC_(helperc_MAKE_STACK_UNINIT), 2423 mkIRExprVec_2( base, mkIRExpr_HWord( (UInt)len) ) 2424 ); 2425 stmt( mce->bb, IRStmt_Dirty(di) ); 2426} 2427 2428 2429/*------------------------------------------------------------*/ 2430/*--- Memcheck main ---*/ 2431/*------------------------------------------------------------*/ 2432 2433static Bool isBogusAtom ( IRAtom* at ) 2434{ 2435 ULong n = 0; 2436 IRConst* con; 2437 tl_assert(isIRAtom(at)); 2438 if (at->tag == Iex_Tmp) 2439 return False; 2440 tl_assert(at->tag == Iex_Const); 2441 con = at->Iex.Const.con; 2442 switch (con->tag) { 2443 case Ico_U1: return False; 2444 case Ico_U8: n = (ULong)con->Ico.U8; break; 2445 case Ico_U16: n = (ULong)con->Ico.U16; break; 2446 case Ico_U32: n = (ULong)con->Ico.U32; break; 2447 case Ico_U64: n = (ULong)con->Ico.U64; break; 2448 case Ico_F64: return False; 2449 case Ico_F64i: return False; 2450 case Ico_V128: return False; 2451 default: ppIRExpr(at); tl_assert(0); 2452 } 2453 /* VG_(printf)("%llx\n", n); */ 2454 return (/*32*/ n == 0xFEFEFEFFULL 2455 /*32*/ || n == 0x80808080ULL 2456 /*64*/ || n == 0xFEFEFEFEFEFEFEFFULL 2457 /*64*/ || n == 0x8080808080808080ULL 2458 /*64*/ || n == 0x0101010101010101ULL 2459 ); 2460} 2461 2462static Bool checkForBogusLiterals ( /*FLAT*/ IRStmt* st ) 2463{ 2464 Int i; 2465 IRExpr* e; 2466 IRDirty* d; 2467 switch (st->tag) { 2468 case Ist_Tmp: 2469 e = st->Ist.Tmp.data; 2470 switch (e->tag) { 2471 case Iex_Get: 2472 case Iex_Tmp: 2473 return False; 2474 case Iex_Const: 2475 return isBogusAtom(e); 2476 case Iex_Unop: 2477 return isBogusAtom(e->Iex.Unop.arg); 2478 case Iex_GetI: 2479 return isBogusAtom(e->Iex.GetI.ix); 2480 case Iex_Binop: 2481 return isBogusAtom(e->Iex.Binop.arg1) 2482 || isBogusAtom(e->Iex.Binop.arg2); 2483 case Iex_Mux0X: 2484 return isBogusAtom(e->Iex.Mux0X.cond) 2485 || isBogusAtom(e->Iex.Mux0X.expr0) 2486 || isBogusAtom(e->Iex.Mux0X.exprX); 2487 case Iex_LDle: 2488 return isBogusAtom(e->Iex.LDle.addr); 2489 case Iex_CCall: 2490 for (i = 0; e->Iex.CCall.args[i]; i++) 2491 if (isBogusAtom(e->Iex.CCall.args[i])) 2492 return True; 2493 return False; 2494 default: 2495 goto unhandled; 2496 } 2497 case Ist_Dirty: 2498 d = st->Ist.Dirty.details; 2499 for (i = 0; d->args[i]; i++) 2500 if (isBogusAtom(d->args[i])) 2501 return True; 2502 if (d->guard && isBogusAtom(d->guard)) 2503 return True; 2504 if (d->mAddr && isBogusAtom(d->mAddr)) 2505 return True; 2506 return False; 2507 case Ist_Put: 2508 return isBogusAtom(st->Ist.Put.data); 2509 case Ist_PutI: 2510 return isBogusAtom(st->Ist.PutI.ix) 2511 || isBogusAtom(st->Ist.PutI.data); 2512 case Ist_STle: 2513 return isBogusAtom(st->Ist.STle.addr) 2514 || isBogusAtom(st->Ist.STle.data); 2515 case Ist_Exit: 2516 return isBogusAtom(st->Ist.Exit.guard); 2517 case Ist_AbiHint: 2518 return isBogusAtom(st->Ist.AbiHint.base); 2519 case Ist_NoOp: 2520 case Ist_IMark: 2521 case Ist_MFence: 2522 return False; 2523 default: 2524 unhandled: 2525 ppIRStmt(st); 2526 VG_(tool_panic)("hasBogusLiterals"); 2527 } 2528} 2529 2530 2531IRBB* MC_(instrument) ( IRBB* bb_in, VexGuestLayout* layout, 2532 IRType gWordTy, IRType hWordTy ) 2533{ 2534 Bool verboze = False; //True; 2535 2536 Int i, j, first_stmt; 2537 IRStmt* st; 2538 MCEnv mce; 2539 IRBB* bb; 2540 2541 if (gWordTy != hWordTy) { 2542 /* We don't currently support this case. */ 2543 VG_(tool_panic)("host/guest word size mismatch"); 2544 } 2545 2546 /* Check we're not completely nuts */ 2547 tl_assert(sizeof(UWord) == sizeof(void*)); 2548 tl_assert(sizeof(Word) == sizeof(void*)); 2549 tl_assert(sizeof(ULong) == 8); 2550 tl_assert(sizeof(Long) == 8); 2551 tl_assert(sizeof(UInt) == 4); 2552 tl_assert(sizeof(Int) == 4); 2553 2554 /* Set up BB */ 2555 bb = emptyIRBB(); 2556 bb->tyenv = dopyIRTypeEnv(bb_in->tyenv); 2557 bb->next = dopyIRExpr(bb_in->next); 2558 bb->jumpkind = bb_in->jumpkind; 2559 2560 /* Set up the running environment. Only .bb is modified as we go 2561 along. */ 2562 mce.bb = bb; 2563 mce.layout = layout; 2564 mce.n_originalTmps = bb->tyenv->types_used; 2565 mce.hWordTy = hWordTy; 2566 mce.bogusLiterals = False; 2567 mce.tmpMap = LibVEX_Alloc(mce.n_originalTmps * sizeof(IRTemp)); 2568 for (i = 0; i < mce.n_originalTmps; i++) 2569 mce.tmpMap[i] = IRTemp_INVALID; 2570 2571 /* Iterate over the stmts. */ 2572 2573 for (i = 0; i < bb_in->stmts_used; i++) { 2574 st = bb_in->stmts[i]; 2575 tl_assert(st); 2576 2577 tl_assert(isFlatIRStmt(st)); 2578 2579 if (!mce.bogusLiterals) { 2580 mce.bogusLiterals = checkForBogusLiterals(st); 2581 if (0&& mce.bogusLiterals) { 2582 VG_(printf)("bogus: "); 2583 ppIRStmt(st); 2584 VG_(printf)("\n"); 2585 } 2586 } 2587 2588 first_stmt = bb->stmts_used; 2589 2590 if (verboze) { 2591 ppIRStmt(st); 2592 VG_(printf)("\n\n"); 2593 } 2594 2595 /* Generate instrumentation code for each stmt ... */ 2596 2597 switch (st->tag) { 2598 2599 case Ist_Tmp: 2600 assign( bb, findShadowTmp(&mce, st->Ist.Tmp.tmp), 2601 expr2vbits( &mce, st->Ist.Tmp.data) ); 2602 break; 2603 2604 case Ist_Put: 2605 do_shadow_PUT( &mce, 2606 st->Ist.Put.offset, 2607 st->Ist.Put.data, 2608 NULL /* shadow atom */ ); 2609 break; 2610 2611 case Ist_PutI: 2612 do_shadow_PUTI( &mce, 2613 st->Ist.PutI.descr, 2614 st->Ist.PutI.ix, 2615 st->Ist.PutI.bias, 2616 st->Ist.PutI.data ); 2617 break; 2618 2619 case Ist_STle: 2620 do_shadow_STle( &mce, st->Ist.STle.addr, 0/* addr bias */, 2621 st->Ist.STle.data, 2622 NULL /* shadow data */ ); 2623 break; 2624 2625 case Ist_Exit: 2626 complainIfUndefined( &mce, st->Ist.Exit.guard ); 2627 break; 2628 2629 case Ist_NoOp: 2630 case Ist_IMark: 2631 case Ist_MFence: 2632 break; 2633 2634 case Ist_Dirty: 2635 do_shadow_Dirty( &mce, st->Ist.Dirty.details ); 2636 break; 2637 2638 case Ist_AbiHint: 2639 do_AbiHint( &mce, st->Ist.AbiHint.base, st->Ist.AbiHint.len ); 2640 break; 2641 2642 default: 2643 VG_(printf)("\n"); 2644 ppIRStmt(st); 2645 VG_(printf)("\n"); 2646 VG_(tool_panic)("memcheck: unhandled IRStmt"); 2647 2648 } /* switch (st->tag) */ 2649 2650 if (verboze) { 2651 for (j = first_stmt; j < bb->stmts_used; j++) { 2652 VG_(printf)(" "); 2653 ppIRStmt(bb->stmts[j]); 2654 VG_(printf)("\n"); 2655 } 2656 VG_(printf)("\n"); 2657 } 2658 2659 /* ... and finally copy the stmt itself to the output. */ 2660 addStmtToIRBB(bb, st); 2661 2662 } 2663 2664 /* Now we need to complain if the jump target is undefined. */ 2665 first_stmt = bb->stmts_used; 2666 2667 if (verboze) { 2668 VG_(printf)("bb->next = "); 2669 ppIRExpr(bb->next); 2670 VG_(printf)("\n\n"); 2671 } 2672 2673 complainIfUndefined( &mce, bb->next ); 2674 2675 if (verboze) { 2676 for (j = first_stmt; j < bb->stmts_used; j++) { 2677 VG_(printf)(" "); 2678 ppIRStmt(bb->stmts[j]); 2679 VG_(printf)("\n"); 2680 } 2681 VG_(printf)("\n"); 2682 } 2683 2684 return bb; 2685} 2686 2687/*--------------------------------------------------------------------*/ 2688/*--- end mc_translate.c ---*/ 2689/*--------------------------------------------------------------------*/ 2690